Bug 1470125 - Update webrender to commit cdfaaeb5f74e416f39af1081c9a676c752d23896. r=Gankro draft
authorKartikaya Gupta <kgupta@mozilla.com>
Thu, 28 Jun 2018 11:48:27 -0400
changeset 812182 654102de6773960af67069e7e4c2b27474d9a04c
parent 811977 6041c030780420b6205cf2d6640513606609884c
child 812183 c2543e9925efbdd0249d8f6e306f62455e12ac48
push id114479
push userkgupta@mozilla.com
push dateThu, 28 Jun 2018 18:33:43 +0000
reviewersGankro
bugs1470125
milestone63.0a1
Bug 1470125 - Update webrender to commit cdfaaeb5f74e416f39af1081c9a676c752d23896. r=Gankro Includes regenerated webrender_ffi_generated.h header. MozReview-Commit-ID: I3lvkCH5IGz
gfx/webrender/Cargo.toml
gfx/webrender/res/brush.glsl
gfx/webrender/res/brush_linear_gradient.glsl
gfx/webrender/res/brush_radial_gradient.glsl
gfx/webrender/res/clip_scroll.glsl
gfx/webrender/res/clip_shared.glsl
gfx/webrender/res/cs_clip_box_shadow.glsl
gfx/webrender/res/cs_clip_image.glsl
gfx/webrender/res/cs_clip_line.glsl
gfx/webrender/res/cs_clip_rectangle.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_split_composite.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/res/snap.glsl
gfx/webrender/res/transform.glsl
gfx/webrender/src/batch.rs
gfx/webrender/src/clip.rs
gfx/webrender/src/clip_scroll_node.rs
gfx/webrender/src/clip_scroll_tree.rs
gfx/webrender/src/device.rs
gfx/webrender/src/device/gl.rs
gfx/webrender/src/device/mod.rs
gfx/webrender/src/display_list_flattener.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/gpu_types.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/scene_builder.rs
gfx/webrender/src/shade.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender_api/src/api.rs
gfx/webrender_api/src/color.rs
gfx/webrender_api/src/display_item.rs
gfx/webrender_api/src/display_list.rs
gfx/webrender_api/src/image.rs
gfx/webrender_bindings/revision.txt
gfx/webrender_bindings/webrender_ffi_generated.h
gfx/wrench/Cargo.toml
gfx/wrench/src/args.yaml
gfx/wrench/src/main.rs
gfx/wrench/src/wrench.rs
gfx/wrench/src/yaml_frame_reader.rs
gfx/wrench/src/yaml_frame_writer.rs
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -22,22 +22,22 @@ app_units = "0.6"
 base64 = { optional = true, version = "0.6" }
 bincode = "1.0"
 bitflags = "1.0"
 byteorder = "1.0"
 cfg-if = "0.1.2"
 euclid = "0.17.3"
 fxhash = "0.2.1"
 gleam = "0.5"
-image = { optional = true, version = "0.18" }
+image = { optional = true, version = "0.19" }
 lazy_static = "1"
 log = "0.4"
 num-traits = "0.1.43"
 plane-split = "0.9.1"
-png = { optional = true, version = "0.11" }
+png = { optional = true, version = "0.12" }
 rayon = "1"
 ron = { optional = true, version = "0.1.7" }
 serde = { optional = true, version = "1.0", features = ["serde_derive"] }
 serde_json = { optional = true, version = "1.0" }
 smallvec = "0.6"
 thread_profiler = "0.1.1"
 time = "0.1"
 webrender_api = {path = "../webrender_api"}
--- a/gfx/webrender/res/brush.glsl
+++ b/gfx/webrender/res/brush.glsl
@@ -11,137 +11,79 @@ void brush_vs(
     RectWithSize segment_rect,
     ivec3 user_data,
     mat4 transform,
     PictureTask pic_task,
     int brush_flags,
     vec4 segment_data
 );
 
-#define VECS_PER_BRUSH_PRIM                 2
 #define VECS_PER_SEGMENT                    2
 
 #define BRUSH_FLAG_PERSPECTIVE_INTERPOLATION    1
 #define BRUSH_FLAG_SEGMENT_RELATIVE             2
 #define BRUSH_FLAG_SEGMENT_REPEAT_X             4
 #define BRUSH_FLAG_SEGMENT_REPEAT_Y             8
 
-//Note: these have to match `gpu_types` constants
-#define INT_BITS    (31)
-#define CLIP_CHAIN_RECT_BITS    (22)
-#define SEGMENT_BITS (INT_BITS - CLIP_CHAIN_RECT_BITS)
-#define EDGE_FLAG_BITS (4)
-#define BRUSH_FLAG_BITS (4)
-#define CLIP_SCROLL_INDEX_BITS (INT_BITS - EDGE_FLAG_BITS - BRUSH_FLAG_BITS)
-
-struct BrushInstance {
-    int picture_address;
-    int prim_address;
-    int clip_chain_rect_index;
-    int scroll_node_id;
-    int clip_address;
-    int z;
-    int segment_index;
-    int edge_mask;
-    int flags;
-    ivec3 user_data;
-};
-
-BrushInstance load_brush() {
-    BrushInstance bi;
-
-    bi.picture_address = aData0.x & 0xffff;
-    bi.clip_address = aData0.x >> 16;
-    bi.prim_address = aData0.y;
-    bi.clip_chain_rect_index = aData0.z  & ((1 << CLIP_CHAIN_RECT_BITS) - 1);
-    bi.segment_index = aData0.z >> CLIP_CHAIN_RECT_BITS;
-    bi.z = aData0.w;
-    bi.scroll_node_id = aData1.x & ((1 << CLIP_SCROLL_INDEX_BITS) - 1);
-    bi.edge_mask = (aData1.x >> CLIP_SCROLL_INDEX_BITS) & 0xf;
-    bi.flags = (aData1.x >> (CLIP_SCROLL_INDEX_BITS + EDGE_FLAG_BITS)) & 0xf;
-    bi.user_data = aData1.yzw;
-
-    return bi;
-}
-
-struct BrushPrimitive {
-    RectWithSize local_rect;
-    RectWithSize local_clip_rect;
-};
-
-BrushPrimitive fetch_brush_primitive(int address, int clip_chain_rect_index) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
-
-    RectWithSize clip_chain_rect = fetch_clip_chain_rect(clip_chain_rect_index);
-    RectWithSize brush_clip_rect = RectWithSize(data[1].xy, data[1].zw);
-    RectWithSize clip_rect = intersect_rects(clip_chain_rect, brush_clip_rect);
-
-    BrushPrimitive prim = BrushPrimitive(RectWithSize(data[0].xy, data[0].zw), clip_rect);
-
-    return prim;
-}
-
 void main(void) {
     // Load the brush instance from vertex attributes.
-    BrushInstance brush = load_brush();
-
-    // Load the geometry for this brush. For now, this is simply the
-    // local rect of the primitive. In the future, this will support
-    // loading segment rects, and other rect formats (glyphs).
-    BrushPrimitive brush_prim =
-        fetch_brush_primitive(brush.prim_address, brush.clip_chain_rect_index);
+    int prim_header_address = aData.x;
+    int clip_address = aData.y;
+    int segment_index = aData.z & 0xffff;
+    int edge_flags = (aData.z >> 16) & 0xff;
+    int brush_flags = (aData.z >> 24) & 0xff;
+    PrimitiveHeader ph = fetch_prim_header(prim_header_address);
 
     // Fetch the segment of this brush primitive we are drawing.
-    int segment_address = brush.prim_address +
-                          VECS_PER_BRUSH_PRIM +
+    int segment_address = ph.specific_prim_address +
                           VECS_PER_SPECIFIC_BRUSH +
-                          brush.segment_index * VECS_PER_SEGMENT;
+                          segment_index * VECS_PER_SEGMENT;
 
     vec4[2] segment_data = fetch_from_resource_cache_2(segment_address);
     RectWithSize local_segment_rect = RectWithSize(segment_data[0].xy, segment_data[0].zw);
 
     VertexInfo vi;
 
     // Fetch the dynamic picture that we are drawing on.
-    PictureTask pic_task = fetch_picture_task(brush.picture_address);
-    ClipArea clip_area = fetch_clip_area(brush.clip_address);
+    PictureTask pic_task = fetch_picture_task(ph.render_task_index);
+    ClipArea clip_area = fetch_clip_area(clip_address);
 
-    ClipScrollNode scroll_node = fetch_clip_scroll_node(brush.scroll_node_id);
+    Transform transform = fetch_transform(ph.transform_id);
 
     // Write the normal vertex information out.
-    if (scroll_node.is_axis_aligned) {
+    if (transform.is_axis_aligned) {
         vi = write_vertex(
             local_segment_rect,
-            brush_prim.local_clip_rect,
-            float(brush.z),
-            scroll_node,
+            ph.local_clip_rect,
+            ph.z,
+            transform,
             pic_task,
-            brush_prim.local_rect
+            ph.local_rect
         );
 
         // TODO(gw): transform bounds may be referenced by
         //           the fragment shader when running in
         //           the alpha pass, even on non-transformed
         //           items. For now, just ensure it has no
         //           effect. We can tidy this up as we move
         //           more items to be brush shaders.
 #ifdef WR_FEATURE_ALPHA_PASS
         init_transform_vs(vec4(vec2(-1000000.0), vec2(1000000.0)));
 #endif
     } else {
-        bvec4 edge_mask = notEqual(brush.edge_mask & ivec4(1, 2, 4, 8), ivec4(0));
-        bool do_perspective_interpolation = (brush.flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0;
+        bvec4 edge_mask = notEqual(edge_flags & ivec4(1, 2, 4, 8), ivec4(0));
+        bool do_perspective_interpolation = (brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0;
 
         vi = write_transform_vertex(
             local_segment_rect,
-            brush_prim.local_rect,
-            brush_prim.local_clip_rect,
+            ph.local_rect,
+            ph.local_clip_rect,
             mix(vec4(0.0), vec4(1.0), edge_mask),
-            float(brush.z),
-            scroll_node,
+            ph.z,
+            transform,
             pic_task,
             do_perspective_interpolation
         );
     }
 
     // For brush instances in the alpha pass, always write
     // out clip information.
     // TODO(gw): It's possible that we might want alpha
@@ -153,23 +95,23 @@ void main(void) {
         vi.screen_pos,
         clip_area
     );
 #endif
 
     // Run the specific brush VS code to write interpolators.
     brush_vs(
         vi,
-        brush.prim_address + VECS_PER_BRUSH_PRIM,
-        brush_prim.local_rect,
+        ph.specific_prim_address,
+        ph.local_rect,
         local_segment_rect,
-        brush.user_data,
-        scroll_node.transform,
+        ph.user_data,
+        transform.m,
         pic_task,
-        brush.flags,
+        brush_flags,
         segment_data[1]
     );
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 
 struct Fragment {
--- a/gfx/webrender/res/brush_linear_gradient.glsl
+++ b/gfx/webrender/res/brush_linear_gradient.glsl
@@ -44,21 +44,26 @@ void brush_vs(
     VertexInfo vi,
     int prim_address,
     RectWithSize local_rect,
     RectWithSize segment_rect,
     ivec3 user_data,
     mat4 transform,
     PictureTask pic_task,
     int brush_flags,
-    vec4 unused
+    vec4 texel_rect
 ) {
     Gradient gradient = fetch_gradient(prim_address);
 
-    vPos = vi.local_pos - local_rect.p0;
+    if ((brush_flags & BRUSH_FLAG_SEGMENT_RELATIVE) != 0) {
+        vPos = (vi.local_pos - segment_rect.p0) / segment_rect.size;
+        vPos = vPos * (texel_rect.zw - texel_rect.xy) + texel_rect.xy;
+    } else {
+        vPos = vi.local_pos - local_rect.p0;
+    }
 
     vec2 start_point = gradient.start_end_point.xy;
     vec2 end_point = gradient.start_end_point.zw;
     vec2 dir = end_point - start_point;
 
     vStartPoint = start_point;
     vScaledDir = dir / dot(dir, dir);
 
--- a/gfx/webrender/res/brush_radial_gradient.glsl
+++ b/gfx/webrender/res/brush_radial_gradient.glsl
@@ -44,21 +44,26 @@ void brush_vs(
     VertexInfo vi,
     int prim_address,
     RectWithSize local_rect,
     RectWithSize segment_rect,
     ivec3 user_data,
     mat4 transform,
     PictureTask pic_task,
     int brush_flags,
-    vec4 unused
+    vec4 texel_rect
 ) {
     RadialGradient gradient = fetch_radial_gradient(prim_address);
 
-    vPos = vi.local_pos - local_rect.p0;
+    if ((brush_flags & BRUSH_FLAG_SEGMENT_RELATIVE) != 0) {
+        vPos = (vi.local_pos - segment_rect.p0) / segment_rect.size;
+        vPos = vPos * (texel_rect.zw - texel_rect.xy) + texel_rect.xy;
+    } else {
+        vPos = vi.local_pos - local_rect.p0;
+    }
 
     vCenter = gradient.center_start_end_radius.xy;
     vStartRadius = gradient.center_start_end_radius.z;
     vEndRadius = gradient.center_start_end_radius.w;
 
     // Transform all coordinates by the y scale so the
     // fragment shader can work with circles
     vec2 tile_repeat = local_rect.size / gradient.stretch_size;
deleted file mode 100644
--- a/gfx/webrender/res/clip_scroll.glsl
+++ /dev/null
@@ -1,88 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifdef WR_VERTEX_SHADER
-#define VECS_PER_CLIP_SCROLL_NODE   9
-
-uniform HIGHP_SAMPLER_FLOAT sampler2D sClipScrollNodes;
-
-struct ClipScrollNode {
-    mat4 transform;
-    mat4 inv_transform;
-    bool is_axis_aligned;
-};
-
-ClipScrollNode fetch_clip_scroll_node(int index) {
-    ClipScrollNode node;
-
-    // Create a UV base coord for each 8 texels.
-    // This is required because trying to use an offset
-    // of more than 8 texels doesn't work on some versions
-    // of OSX.
-    ivec2 uv = get_fetch_uv(index, VECS_PER_CLIP_SCROLL_NODE);
-    ivec2 uv0 = ivec2(uv.x + 0, uv.y);
-    ivec2 uv1 = ivec2(uv.x + 8, uv.y);
-
-    node.transform[0] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(0, 0));
-    node.transform[1] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(1, 0));
-    node.transform[2] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(2, 0));
-    node.transform[3] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(3, 0));
-
-    node.inv_transform[0] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(4, 0));
-    node.inv_transform[1] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(5, 0));
-    node.inv_transform[2] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(6, 0));
-    node.inv_transform[3] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(7, 0));
-
-    vec4 misc = TEXEL_FETCH(sClipScrollNodes, uv1, 0, ivec2(0, 0));
-    node.is_axis_aligned = misc.x == 0.0;
-
-    return node;
-}
-
-// Return the intersection of the plane (set up by "normal" and "point")
-// with the ray (set up by "ray_origin" and "ray_dir"),
-// writing the resulting scaler into "t".
-bool ray_plane(vec3 normal, vec3 pt, vec3 ray_origin, vec3 ray_dir, out float t)
-{
-    float denom = dot(normal, ray_dir);
-    if (abs(denom) > 1e-6) {
-        vec3 d = pt - ray_origin;
-        t = dot(d, normal) / denom;
-        return t >= 0.0;
-    }
-
-    return false;
-}
-
-// Apply the inverse transform "inv_transform"
-// to the reference point "ref" in CSS space,
-// producing a local point on a ClipScrollNode plane,
-// set by a base point "a" and a normal "n".
-vec4 untransform(vec2 ref, vec3 n, vec3 a, mat4 inv_transform) {
-    vec3 p = vec3(ref, -10000.0);
-    vec3 d = vec3(0, 0, 1.0);
-
-    float t = 0.0;
-    // get an intersection of the ClipScrollNode plane with Z axis vector,
-    // originated from the "ref" point
-    ray_plane(n, a, p, d, t);
-    float z = p.z + d.z * t; // Z of the visible point on the ClipScrollNode
-
-    vec4 r = inv_transform * vec4(ref, z, 1.0);
-    return r;
-}
-
-// Given a CSS space position, transform it back into the ClipScrollNode space.
-vec4 get_node_pos(vec2 pos, ClipScrollNode node) {
-    // get a point on the scroll node plane
-    vec4 ah = node.transform * vec4(0.0, 0.0, 0.0, 1.0);
-    vec3 a = ah.xyz / ah.w;
-
-    // get the normal to the scroll node plane
-    vec3 n = transpose(mat3(node.inv_transform)) * vec3(0.0, 0.0, 1.0);
-    return untransform(pos, n, a, node.inv_transform);
-}
-
-#endif //WR_VERTEX_SHADER
-
--- a/gfx/webrender/res/clip_shared.glsl
+++ b/gfx/webrender/res/clip_shared.glsl
@@ -1,40 +1,40 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#include rect,clip_scroll,render_task,resource_cache,snap,transform
+#include rect,render_task,resource_cache,snap,transform
 
 #ifdef WR_VERTEX_SHADER
 
 #define SEGMENT_ALL         0
 #define SEGMENT_CORNER_TL   1
 #define SEGMENT_CORNER_TR   2
 #define SEGMENT_CORNER_BL   3
 #define SEGMENT_CORNER_BR   4
 
 in int aClipRenderTaskAddress;
 in int aScrollNodeId;
 in int aClipSegment;
 in ivec4 aClipDataResourceAddress;
 
 struct ClipMaskInstance {
     int render_task_address;
-    int scroll_node_id;
+    int transform_id;
     int segment;
     ivec2 clip_data_address;
     ivec2 resource_address;
 };
 
 ClipMaskInstance fetch_clip_item() {
     ClipMaskInstance cmi;
 
     cmi.render_task_address = aClipRenderTaskAddress;
-    cmi.scroll_node_id = aScrollNodeId;
+    cmi.transform_id = aScrollNodeId;
     cmi.segment = aClipSegment;
     cmi.clip_data_address = aClipDataResourceAddress.xy;
     cmi.resource_address = aClipDataResourceAddress.zw;
 
     return cmi;
 }
 
 struct ClipVertexInfo {
@@ -46,47 +46,47 @@ struct ClipVertexInfo {
 RectWithSize intersect_rect(RectWithSize a, RectWithSize b) {
     vec4 p = clamp(vec4(a.p0, a.p0 + a.size), b.p0.xyxy, b.p0.xyxy + b.size.xyxy);
     return RectWithSize(p.xy, max(vec2(0.0), p.zw - p.xy));
 }
 
 // The transformed vertex function that always covers the whole clip area,
 // which is the intersection of all clip instances of a given primitive
 ClipVertexInfo write_clip_tile_vertex(RectWithSize local_clip_rect,
-                                      ClipScrollNode scroll_node,
+                                      Transform transform,
                                       ClipArea area) {
     vec2 device_pos = area.screen_origin + aPosition.xy * area.common_data.task_rect.size;
     vec2 actual_pos = device_pos;
 
-    if (scroll_node.is_axis_aligned) {
+    if (transform.is_axis_aligned) {
         vec4 snap_positions = compute_snap_positions(
-            scroll_node.transform,
+            transform.m,
             local_clip_rect
         );
 
         vec2 snap_offsets = compute_snap_offset_impl(
             device_pos,
-            scroll_node.transform,
+            transform.m,
             local_clip_rect,
             RectWithSize(snap_positions.xy, snap_positions.zw - snap_positions.xy),
             snap_positions,
             vec2(0.5)
         );
 
         actual_pos -= snap_offsets;
     }
 
     vec4 node_pos;
 
     // Select the local position, based on whether we are rasterizing this
     // clip mask in local- or sccreen-space.
     if (area.local_space) {
         node_pos = vec4(actual_pos / uDevicePixelRatio, 0.0, 1.0);
     } else {
-        node_pos = get_node_pos(actual_pos / uDevicePixelRatio, scroll_node);
+        node_pos = get_node_pos(actual_pos / uDevicePixelRatio, transform);
     }
 
     // compute the point position inside the scroll node, in CSS space
     vec2 vertex_pos = device_pos +
                       area.common_data.task_rect.p0 -
                       area.screen_origin;
 
     gl_Position = uTransform * vec4(vertex_pos, 0.0, 1);
--- a/gfx/webrender/res/cs_clip_box_shadow.glsl
+++ b/gfx/webrender/res/cs_clip_box_shadow.glsl
@@ -36,22 +36,22 @@ BoxShadowData fetch_data(ivec2 address) 
         dest_rect
     );
     return bs_data;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
-    ClipScrollNode scroll_node = fetch_clip_scroll_node(cmi.scroll_node_id);
+    Transform transform = fetch_transform(cmi.transform_id);
     BoxShadowData bs_data = fetch_data(cmi.clip_data_address);
     ImageResource res = fetch_image_resource_direct(cmi.resource_address);
 
     ClipVertexInfo vi = write_clip_tile_vertex(bs_data.dest_rect,
-                                               scroll_node,
+                                               transform,
                                                area);
 
     vLayer = res.layer;
     vPos = vi.local_pos;
     vClipMode = bs_data.clip_mode;
 
     vec2 uv0 = res.uv_rect.p0;
     vec2 uv1 = res.uv_rect.p1;
--- a/gfx/webrender/res/cs_clip_image.glsl
+++ b/gfx/webrender/res/cs_clip_image.glsl
@@ -21,23 +21,23 @@ ImageMaskData fetch_mask_data(ivec2 addr
     RectWithSize local_rect = RectWithSize(data.xy, data.zw);
     ImageMaskData mask_data = ImageMaskData(local_rect);
     return mask_data;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
-    ClipScrollNode scroll_node = fetch_clip_scroll_node(cmi.scroll_node_id);
+    Transform transform = fetch_transform(cmi.transform_id);
     ImageMaskData mask = fetch_mask_data(cmi.clip_data_address);
     RectWithSize local_rect = mask.local_rect;
     ImageResource res = fetch_image_resource_direct(cmi.resource_address);
 
     ClipVertexInfo vi = write_clip_tile_vertex(local_rect,
-                                               scroll_node,
+                                               transform,
                                                area);
 
     vPos = vi.local_pos;
     vLayer = res.layer;
 
     vClipMaskImageUv = vec3((vPos.xy / vPos.z - local_rect.p0) / local_rect.size, 0.0);
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vClipMaskUvRect = vec4(res.uv_rect.p0, res.uv_rect.p1 - res.uv_rect.p0) / texture_size.xyxy;
--- a/gfx/webrender/res/cs_clip_line.glsl
+++ b/gfx/webrender/res/cs_clip_line.glsl
@@ -38,21 +38,21 @@ LineDecorationData fetch_data(ivec2 addr
         data[1].z
     );
     return line_data;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
-    ClipScrollNode scroll_node = fetch_clip_scroll_node(cmi.scroll_node_id);
+    Transform transform = fetch_transform(cmi.transform_id);
     LineDecorationData data = fetch_data(cmi.clip_data_address);
 
     ClipVertexInfo vi = write_clip_tile_vertex(data.local_rect,
-                                               scroll_node,
+                                               transform,
                                                area);
 
 
     vLocalPos = vi.local_pos;
 
     vec2 pos, size;
 
     switch (int(data.orientation)) {
--- a/gfx/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.glsl
@@ -55,21 +55,21 @@ ClipData fetch_clip(ivec2 address) {
     clip.bottom_right = fetch_clip_corner(address, 3.0);
 
     return clip;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
-    ClipScrollNode scroll_node = fetch_clip_scroll_node(cmi.scroll_node_id);
+    Transform transform = fetch_transform(cmi.transform_id);
     ClipData clip = fetch_clip(cmi.clip_data_address);
     RectWithSize local_rect = clip.rect.rect;
 
-    ClipVertexInfo vi = write_clip_tile_vertex(local_rect, scroll_node, area);
+    ClipVertexInfo vi = write_clip_tile_vertex(local_rect, transform, area);
     vPos = vi.local_pos;
 
     vClipMode = clip.rect.mode.x;
 
     RectWithEndpoint clip_rect = to_rect_with_endpoint(local_rect);
 
     vec2 r_tl = clip.top_left.outer_inner_radius.xy;
     vec2 r_tr = clip.top_right.outer_inner_radius.xy;
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#include rect,clip_scroll,render_task,resource_cache,snap,transform
+#include rect,render_task,resource_cache,snap,transform
 
 #define EXTEND_MODE_CLAMP  0
 #define EXTEND_MODE_REPEAT 1
 
 #define SUBPX_DIR_NONE        0
 #define SUBPX_DIR_HORIZONTAL  1
 #define SUBPX_DIR_VERTICAL    2
 #define SUBPX_DIR_MIXED       3
@@ -27,73 +27,98 @@ vec2 clamp_rect(vec2 pt, RectWithSize re
 
 // TODO: convert back to RectWithEndPoint if driver issues are resolved, if ever.
 flat varying vec4 vClipMaskUvBounds;
 varying vec3 vClipMaskUv;
 
 
 #ifdef WR_VERTEX_SHADER
 
-#define VECS_PER_LOCAL_CLIP_RECT    1
-#define VECS_PER_PRIM_HEADER        2
-#define VECS_PER_TEXT_RUN           3
-#define VECS_PER_GRADIENT_STOP      2
-
 #define COLOR_MODE_FROM_PASS          0
 #define COLOR_MODE_ALPHA              1
 #define COLOR_MODE_SUBPX_CONST_COLOR  2
 #define COLOR_MODE_SUBPX_BG_PASS0     3
 #define COLOR_MODE_SUBPX_BG_PASS1     4
 #define COLOR_MODE_SUBPX_BG_PASS2     5
 #define COLOR_MODE_SUBPX_DUAL_SOURCE  6
 #define COLOR_MODE_BITMAP             7
 #define COLOR_MODE_COLOR_BITMAP       8
 
-uniform HIGHP_SAMPLER_FLOAT sampler2D sLocalClipRects;
+uniform HIGHP_SAMPLER_FLOAT sampler2D sPrimitiveHeadersF;
+uniform HIGHP_SAMPLER_FLOAT isampler2D sPrimitiveHeadersI;
 
 // Instanced attributes
-in ivec4 aData0;
-in ivec4 aData1;
+in ivec4 aData;
+
+#define VECS_PER_PRIM_HEADER_F 2
+#define VECS_PER_PRIM_HEADER_I 2
+
+struct PrimitiveHeader {
+    RectWithSize local_rect;
+    RectWithSize local_clip_rect;
+    float z;
+    int specific_prim_address;
+    int render_task_index;
+    int clip_task_index;
+    int transform_id;
+    ivec3 user_data;
+};
 
-RectWithSize fetch_clip_chain_rect(int index) {
-    ivec2 uv = get_fetch_uv(index, VECS_PER_LOCAL_CLIP_RECT);
-    vec4 rect = TEXEL_FETCH(sLocalClipRects, uv, 0, ivec2(0, 0));
-    return RectWithSize(rect.xy, rect.zw);
+PrimitiveHeader fetch_prim_header(int index) {
+    PrimitiveHeader ph;
+
+    ivec2 uv_f = get_fetch_uv(index, VECS_PER_PRIM_HEADER_F);
+    vec4 local_rect = TEXEL_FETCH(sPrimitiveHeadersF, uv_f, 0, ivec2(0, 0));
+    vec4 local_clip_rect = TEXEL_FETCH(sPrimitiveHeadersF, uv_f, 0, ivec2(1, 0));
+    ph.local_rect = RectWithSize(local_rect.xy, local_rect.zw);
+    ph.local_clip_rect = RectWithSize(local_clip_rect.xy, local_clip_rect.zw);
+
+    ivec2 uv_i = get_fetch_uv(index, VECS_PER_PRIM_HEADER_I);
+    ivec4 data0 = TEXEL_FETCH(sPrimitiveHeadersI, uv_i, 0, ivec2(0, 0));
+    ivec4 data1 = TEXEL_FETCH(sPrimitiveHeadersI, uv_i, 0, ivec2(1, 0));
+    ph.z = float(data0.x);
+    ph.render_task_index = data0.y;
+    ph.specific_prim_address = data0.z;
+    ph.clip_task_index = data0.w;
+    ph.transform_id = data1.x;
+    ph.user_data = data1.yzw;
+
+    return ph;
 }
 
 struct VertexInfo {
     vec2 local_pos;
     vec2 screen_pos;
     float w;
     vec2 snapped_device_pos;
 };
 
 VertexInfo write_vertex(RectWithSize instance_rect,
                         RectWithSize local_clip_rect,
                         float z,
-                        ClipScrollNode scroll_node,
+                        Transform transform,
                         PictureTask task,
                         RectWithSize snap_rect) {
 
     // Select the corner of the local rect that we are processing.
     vec2 local_pos = instance_rect.p0 + instance_rect.size * aPosition.xy;
 
     // Clamp to the two local clip rects.
     vec2 clamped_local_pos = clamp_rect(local_pos, local_clip_rect);
 
     /// Compute the snapping offset.
     vec2 snap_offset = compute_snap_offset(
         clamped_local_pos,
-        scroll_node.transform,
+        transform.m,
         snap_rect,
         vec2(0.5)
     );
 
     // Transform the current vertex to world space.
-    vec4 world_pos = scroll_node.transform * vec4(clamped_local_pos, 0.0, 1.0);
+    vec4 world_pos = transform.m * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
 
     // Apply offsets for the render task to get correct screen location.
     vec2 snapped_device_pos = device_pos + snap_offset;
     vec2 final_pos = snapped_device_pos -
                      task.content_origin +
@@ -130,17 +155,17 @@ vec2 intersect_lines(vec2 p0, vec2 p1, v
     return vec2(nx / d, ny / d);
 }
 
 VertexInfo write_transform_vertex(RectWithSize local_segment_rect,
                                   RectWithSize local_prim_rect,
                                   RectWithSize local_clip_rect,
                                   vec4 clip_edge_mask,
                                   float z,
-                                  ClipScrollNode scroll_node,
+                                  Transform transform,
                                   PictureTask task,
                                   bool do_perspective_interpolation) {
     // Calculate a clip rect from local_rect + local clip
     RectWithEndpoint clip_rect = to_rect_with_endpoint(local_clip_rect);
     RectWithEndpoint segment_rect = to_rect_with_endpoint(local_segment_rect);
     segment_rect.p0 = clamp(segment_rect.p0, clip_rect.p0, clip_rect.p1);
     segment_rect.p1 = clamp(segment_rect.p1, clip_rect.p0, clip_rect.p1);
 
@@ -163,17 +188,17 @@ VertexInfo write_transform_vertex(RectWi
     vec4 extrude_distance = vec4(extrude_amount) * clip_edge_mask;
     local_segment_rect.p0 -= extrude_distance.xy;
     local_segment_rect.size += extrude_distance.xy + extrude_distance.zw;
 
     // Select the corner of the local rect that we are processing.
     vec2 local_pos = local_segment_rect.p0 + local_segment_rect.size * aPosition.xy;
 
     // Transform the current vertex to the world cpace.
-    vec4 world_pos = scroll_node.transform * vec4(local_pos, 0.0, 1.0);
+    vec4 world_pos = transform.m * vec4(local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
     vec2 task_offset = task.common_data.task_rect.p0 - task.content_origin;
 
     // Force w = 1, if we don't want perspective interpolation (for
     // example, drawing a screen-space quad on an element with a
     // perspective transform).
--- a/gfx/webrender/res/ps_split_composite.glsl
+++ b/gfx/webrender/res/ps_split_composite.glsl
@@ -39,20 +39,20 @@ struct SplitCompositeInstance {
     int src_task_index;
     int polygons_address;
     float z;
 };
 
 SplitCompositeInstance fetch_composite_instance() {
     SplitCompositeInstance ci;
 
-    ci.render_task_index = aData0.x;
-    ci.src_task_index = aData0.y;
-    ci.polygons_address = aData0.z;
-    ci.z = float(aData0.w);
+    ci.render_task_index = aData.x;
+    ci.src_task_index = aData.y;
+    ci.polygons_address = aData.z;
+    ci.z = float(aData.w);
 
     return ci;
 }
 
 void main(void) {
     SplitCompositeInstance ci = fetch_composite_instance();
     SplitGeometry geometry = fetch_split_geometry(ci.polygons_address);
     PictureTask src_task = fetch_picture_task(ci.src_task_index);
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -10,16 +10,18 @@ flat varying vec4 vUvBorder;
 flat varying vec2 vMaskSwizzle;
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
 varying vec4 vUvClip;
 #endif
 
 #ifdef WR_VERTEX_SHADER
 
+#define VECS_PER_TEXT_RUN           3
+
 struct Glyph {
     vec2 offset;
 };
 
 Glyph fetch_glyph(int specific_prim_address,
                   int glyph_index) {
     // Two glyphs are packed in each texel in the GPU cache.
     int glyph_address = specific_prim_address +
@@ -52,137 +54,59 @@ struct TextRun {
     vec2 offset;
 };
 
 TextRun fetch_text_run(int address) {
     vec4 data[3] = fetch_from_resource_cache_3(address);
     return TextRun(data[0], data[1], data[2].xy);
 }
 
-struct PrimitiveInstance {
-    int prim_address;
-    int specific_prim_address;
-    int render_task_index;
-    int clip_task_index;
-    int scroll_node_id;
-    int clip_chain_rect_index;
-    int z;
-    int user_data0;
-    int user_data1;
-    int user_data2;
-};
-
-PrimitiveInstance fetch_prim_instance() {
-    PrimitiveInstance pi;
-
-    pi.prim_address = aData0.x;
-    pi.specific_prim_address = pi.prim_address + VECS_PER_PRIM_HEADER;
-    pi.render_task_index = aData0.y % 0x10000;
-    pi.clip_task_index = aData0.y / 0x10000;
-    pi.clip_chain_rect_index = aData0.z;
-    pi.scroll_node_id = aData0.w;
-    pi.z = aData1.x;
-    pi.user_data0 = aData1.y;
-    pi.user_data1 = aData1.z;
-    pi.user_data2 = aData1.w;
-
-    return pi;
-}
-
-struct Primitive {
-    ClipScrollNode scroll_node;
-    ClipArea clip_area;
-    PictureTask task;
-    RectWithSize local_rect;
-    RectWithSize local_clip_rect;
-    int specific_prim_address;
-    int user_data0;
-    int user_data1;
-    int user_data2;
-    float z;
-};
-
-struct PrimitiveGeometry {
-    RectWithSize local_rect;
-    RectWithSize local_clip_rect;
-};
-
-PrimitiveGeometry fetch_primitive_geometry(int address) {
-    vec4 geom[2] = fetch_from_resource_cache_2(address);
-    return PrimitiveGeometry(RectWithSize(geom[0].xy, geom[0].zw),
-                             RectWithSize(geom[1].xy, geom[1].zw));
-}
-
-Primitive load_primitive() {
-    PrimitiveInstance pi = fetch_prim_instance();
-
-    Primitive prim;
-
-    prim.scroll_node = fetch_clip_scroll_node(pi.scroll_node_id);
-    prim.clip_area = fetch_clip_area(pi.clip_task_index);
-    prim.task = fetch_picture_task(pi.render_task_index);
-
-    RectWithSize clip_chain_rect = fetch_clip_chain_rect(pi.clip_chain_rect_index);
-
-    PrimitiveGeometry geom = fetch_primitive_geometry(pi.prim_address);
-    prim.local_rect = geom.local_rect;
-    prim.local_clip_rect = intersect_rects(clip_chain_rect, geom.local_clip_rect);
-
-    prim.specific_prim_address = pi.specific_prim_address;
-    prim.user_data0 = pi.user_data0;
-    prim.user_data1 = pi.user_data1;
-    prim.user_data2 = pi.user_data2;
-    prim.z = float(pi.z);
-
-    return prim;
-}
-
 VertexInfo write_text_vertex(vec2 clamped_local_pos,
                              RectWithSize local_clip_rect,
                              float z,
-                             ClipScrollNode scroll_node,
+                             Transform transform,
                              PictureTask task,
                              vec2 text_offset,
                              RectWithSize snap_rect,
                              vec2 snap_bias) {
     // Transform the current vertex to world space.
-    vec4 world_pos = scroll_node.transform * vec4(clamped_local_pos, 0.0, 1.0);
+    vec4 world_pos = transform.m * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     float device_scale = uDevicePixelRatio / world_pos.w;
     vec2 device_pos = world_pos.xy * device_scale;
 
     // Apply offsets for the render task to get correct screen location.
     vec2 final_pos = device_pos -
                      task.content_origin +
                      task.common_data.task_rect.p0;
 
 #if defined(WR_FEATURE_GLYPH_TRANSFORM)
     bool remove_subpx_offset = true;
 #else
     // Compute the snapping offset only if the scroll node transform is axis-aligned.
-    bool remove_subpx_offset = scroll_node.is_axis_aligned;
+    bool remove_subpx_offset = transform.is_axis_aligned;
 #endif
     if (remove_subpx_offset) {
         // Ensure the transformed text offset does not contain a subpixel translation
         // such that glyph snapping is stable for equivalent glyph subpixel positions.
-        vec2 world_text_offset = mat2(scroll_node.transform) * text_offset;
-        vec2 device_text_pos = (scroll_node.transform[3].xy + world_text_offset) * device_scale;
+        vec2 world_text_offset = mat2(transform.m) * text_offset;
+        vec2 device_text_pos = (transform.m[3].xy + world_text_offset) * device_scale;
         final_pos += floor(device_text_pos + 0.5) - device_text_pos;
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
         // For transformed subpixels, we just need to align the glyph origin to a device pixel.
         // The transformed text offset has already been snapped, so remove it from the glyph
         // origin when snapping the glyph.
         vec2 snap_offset = snap_rect.p0 - world_text_offset * device_scale;
         final_pos += floor(snap_offset + snap_bias) - snap_offset;
 #else
         // The transformed text offset has already been snapped, so remove it from the transform
         // when snapping the glyph.
-        mat4 snap_transform = scroll_node.transform;
+        mat4 snap_transform = transform.m;
         snap_transform[3].xy = -world_text_offset;
         final_pos += compute_snap_offset(
             clamped_local_pos,
             snap_transform,
             snap_rect,
             snap_bias
         );
 #endif
@@ -196,65 +120,71 @@ VertexInfo write_text_vertex(vec2 clampe
         world_pos.w,
         final_pos
     );
 
     return vi;
 }
 
 void main(void) {
-    Primitive prim = load_primitive();
-    TextRun text = fetch_text_run(prim.specific_prim_address);
+    int prim_header_address = aData.x;
+    int glyph_index = aData.y;
+    int resource_address = aData.z;
+    int subpx_dir = aData.w >> 16;
+    int color_mode = aData.w & 0xffff;
 
-    int glyph_index = prim.user_data0;
-    int resource_address = prim.user_data1;
-    int subpx_dir = prim.user_data2 >> 16;
-    int color_mode = prim.user_data2 & 0xffff;
+    PrimitiveHeader ph = fetch_prim_header(prim_header_address);
+
+    Transform transform = fetch_transform(ph.transform_id);
+    ClipArea clip_area = fetch_clip_area(ph.clip_task_index);
+    PictureTask task = fetch_picture_task(ph.render_task_index);
+
+    TextRun text = fetch_text_run(ph.specific_prim_address);
 
     if (color_mode == COLOR_MODE_FROM_PASS) {
         color_mode = uMode;
     }
 
-    Glyph glyph = fetch_glyph(prim.specific_prim_address, glyph_index);
+    Glyph glyph = fetch_glyph(ph.specific_prim_address, glyph_index);
     GlyphResource res = fetch_glyph_resource(resource_address);
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
     // Transform from local space to glyph space.
-    mat2 transform = mat2(prim.scroll_node.transform) * uDevicePixelRatio;
+    mat2 glyph_transform = mat2(transform.m) * uDevicePixelRatio;
 
     // Compute the glyph rect in glyph space.
-    RectWithSize glyph_rect = RectWithSize(res.offset + transform * (text.offset + glyph.offset),
+    RectWithSize glyph_rect = RectWithSize(res.offset + glyph_transform * (text.offset + glyph.offset),
                                            res.uv_rect.zw - res.uv_rect.xy);
 
     // Transform the glyph rect back to local space.
-    mat2 inv = inverse(transform);
+    mat2 inv = inverse(glyph_transform);
     RectWithSize local_rect = transform_rect(glyph_rect, inv);
 
     // Select the corner of the glyph's local space rect that we are processing.
     vec2 local_pos = local_rect.p0 + local_rect.size * aPosition.xy;
 
     // If the glyph's local rect would fit inside the local clip rect, then select a corner from
     // the device space glyph rect to reduce overdraw of clipped pixels in the fragment shader.
     // Otherwise, fall back to clamping the glyph's local rect to the local clip rect.
-    local_pos = rect_inside_rect(local_rect, prim.local_clip_rect) ?
+    local_pos = rect_inside_rect(local_rect, ph.local_clip_rect) ?
                     inv * (glyph_rect.p0 + glyph_rect.size * aPosition.xy) :
-                    clamp_rect(local_pos, prim.local_clip_rect);
+                    clamp_rect(local_pos, ph.local_clip_rect);
 #else
     // Scale from glyph space to local space.
     float scale = res.scale / uDevicePixelRatio;
 
     // Compute the glyph rect in local space.
     RectWithSize glyph_rect = RectWithSize(scale * res.offset + text.offset + glyph.offset,
                                            scale * (res.uv_rect.zw - res.uv_rect.xy));
 
     // Select the corner of the glyph rect that we are processing.
     vec2 local_pos = glyph_rect.p0 + glyph_rect.size * aPosition.xy;
 
     // Clamp to the local clip rect.
-    local_pos = clamp_rect(local_pos, prim.local_clip_rect);
+    local_pos = clamp_rect(local_pos, ph.local_clip_rect);
 #endif
 
     vec2 snap_bias;
     // In subpixel mode, the subpixel offset has already been
     // accounted for while rasterizing the glyph. However, we
     // must still round with a subpixel bias rather than rounding
     // to the nearest whole pixel, depending on subpixel direciton.
     switch (subpx_dir) {
@@ -273,32 +203,32 @@ void main(void) {
             snap_bias = vec2(0.5, 0.125);
             break;
         case SUBPX_DIR_MIXED:
             snap_bias = vec2(0.125);
             break;
     }
 
     VertexInfo vi = write_text_vertex(local_pos,
-                                      prim.local_clip_rect,
-                                      prim.z,
-                                      prim.scroll_node,
-                                      prim.task,
+                                      ph.local_clip_rect,
+                                      ph.z,
+                                      transform,
+                                      task,
                                       text.offset,
                                       glyph_rect,
                                       snap_bias);
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
-    vec2 f = (transform * vi.local_pos - glyph_rect.p0) / glyph_rect.size;
+    vec2 f = (glyph_transform * vi.local_pos - glyph_rect.p0) / glyph_rect.size;
     vUvClip = vec4(f, 1.0 - f);
 #else
     vec2 f = (vi.local_pos - glyph_rect.p0) / glyph_rect.size;
 #endif
 
-    write_clip(vi.screen_pos, prim.clip_area);
+    write_clip(vi.screen_pos, clip_area);
 
     switch (color_mode) {
         case COLOR_MODE_ALPHA:
         case COLOR_MODE_BITMAP:
             vMaskSwizzle = vec2(0.0, 1.0);
             vColor = text.color;
             break;
         case COLOR_MODE_SUBPX_BG_PASS2:
--- a/gfx/webrender/res/snap.glsl
+++ b/gfx/webrender/res/snap.glsl
@@ -3,17 +3,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifdef WR_VERTEX_SHADER
 
 vec4 compute_snap_positions(mat4 transform, RectWithSize snap_rect) {
     // Ensure that the snap rect is at *least* one device pixel in size.
     // TODO(gw): It's not clear to me that this is "correct". Specifically,
     //           how should it interact with sub-pixel snap rects when there
-    //           is a scroll_node transform with scale present? But it does fix
+    //           is a transform with scale present? But it does fix
     //           the test cases we have in Servo that are failing without it
     //           and seem better than not having this at all.
     snap_rect.size = max(snap_rect.size, vec2(1.0 / uDevicePixelRatio));
 
     // Transform the snap corners to the world space.
     vec4 world_snap_p0 = transform * vec4(snap_rect.p0, 0.0, 1.0);
     vec4 world_snap_p1 = transform * vec4(snap_rect.p0 + snap_rect.size, 0.0, 1.0);
     // Snap bounds in world coordinates, adjusted for pixel ratio. XY = top left, ZW = bottom right
@@ -36,17 +36,17 @@ vec2 compute_snap_offset_impl(
     /// Compute the position of this vertex inside the snap rectangle.
     vec2 normalized_snap_pos = (reference_pos - reference_rect.p0) / reference_rect.size;
 
     /// Compute the actual world offset for this vertex needed to make it snap.
     return mix(snap_offsets.xy, snap_offsets.zw, normalized_snap_pos);
 }
 
 // Compute a snapping offset in world space (adjusted to pixel ratio),
-// given local position on the scroll_node and a snap rectangle.
+// given local position on the transform and a snap rectangle.
 vec2 compute_snap_offset(vec2 local_pos,
                          mat4 transform,
                          RectWithSize snap_rect,
                          vec2 snap_bias) {
     vec4 snap_positions = compute_snap_positions(
         transform,
         snap_rect
     );
--- a/gfx/webrender/res/transform.glsl
+++ b/gfx/webrender/res/transform.glsl
@@ -1,20 +1,99 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 flat varying vec4 vTransformBounds;
 
 #ifdef WR_VERTEX_SHADER
 
+#define VECS_PER_TRANSFORM   8
+uniform HIGHP_SAMPLER_FLOAT sampler2D sTransformPalette;
+
 void init_transform_vs(vec4 local_bounds) {
     vTransformBounds = local_bounds;
 }
 
+struct Transform {
+    mat4 m;
+    mat4 inv_m;
+    bool is_axis_aligned;
+};
+
+Transform fetch_transform(int id) {
+    Transform transform;
+
+    transform.is_axis_aligned = (id >> 24) == 0;
+    int index = id & 0x00ffffff;
+
+    // Create a UV base coord for each 8 texels.
+    // This is required because trying to use an offset
+    // of more than 8 texels doesn't work on some versions
+    // of OSX.
+    ivec2 uv = get_fetch_uv(index, VECS_PER_TRANSFORM);
+    ivec2 uv0 = ivec2(uv.x + 0, uv.y);
+
+    transform.m[0] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(0, 0));
+    transform.m[1] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(1, 0));
+    transform.m[2] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(2, 0));
+    transform.m[3] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(3, 0));
+
+    transform.inv_m[0] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(4, 0));
+    transform.inv_m[1] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(5, 0));
+    transform.inv_m[2] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(6, 0));
+    transform.inv_m[3] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(7, 0));
+
+    return transform;
+}
+
+// Return the intersection of the plane (set up by "normal" and "point")
+// with the ray (set up by "ray_origin" and "ray_dir"),
+// writing the resulting scaler into "t".
+bool ray_plane(vec3 normal, vec3 pt, vec3 ray_origin, vec3 ray_dir, out float t)
+{
+    float denom = dot(normal, ray_dir);
+    if (abs(denom) > 1e-6) {
+        vec3 d = pt - ray_origin;
+        t = dot(d, normal) / denom;
+        return t >= 0.0;
+    }
+
+    return false;
+}
+
+// Apply the inverse transform "inv_transform"
+// to the reference point "ref" in CSS space,
+// producing a local point on a Transform plane,
+// set by a base point "a" and a normal "n".
+vec4 untransform(vec2 ref, vec3 n, vec3 a, mat4 inv_transform) {
+    vec3 p = vec3(ref, -10000.0);
+    vec3 d = vec3(0, 0, 1.0);
+
+    float t = 0.0;
+    // get an intersection of the Transform plane with Z axis vector,
+    // originated from the "ref" point
+    ray_plane(n, a, p, d, t);
+    float z = p.z + d.z * t; // Z of the visible point on the Transform
+
+    vec4 r = inv_transform * vec4(ref, z, 1.0);
+    return r;
+}
+
+// Given a CSS space position, transform it back into the Transform space.
+vec4 get_node_pos(vec2 pos, Transform transform) {
+    // get a point on the scroll node plane
+    vec4 ah = transform.m * vec4(0.0, 0.0, 0.0, 1.0);
+    vec3 a = ah.xyz / ah.w;
+
+    // get the normal to the scroll node plane
+    vec3 n = transpose(mat3(transform.inv_m)) * vec3(0.0, 0.0, 1.0);
+    return untransform(pos, n, a, transform.inv_m);
+}
+
 #endif //WR_VERTEX_SHADER
 
 #ifdef WR_FRAGMENT_SHADER
 
 float signed_distance_rect(vec2 pos, vec2 p0, vec2 p1) {
     vec2 d = max(p0 - pos, pos - p1);
     return length(max(vec2(0.0), d)) + min(0.0, max(d.x, d.y));
 }
--- a/gfx/webrender/src/batch.rs
+++ b/gfx/webrender/src/batch.rs
@@ -6,35 +6,35 @@ use api::{AlphaType, ClipMode, DeviceInt
 use api::{DeviceUintRect, DeviceUintPoint, ExternalImageType, FilterOp, ImageRendering, LayoutRect};
 use api::{DeviceIntPoint, YuvColorSpace, YuvFormat};
 use api::{LayoutToWorldTransform, WorldPixel};
 use clip::{ClipSource, ClipStore, ClipWorkItem};
 use clip_scroll_tree::{CoordinateSystemId};
 use euclid::{TypedTransform3D, vec3};
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheHandle, GpuCacheAddress};
-use gpu_types::{BrushFlags, BrushInstance, ClipChainRectIndex};
-use gpu_types::{ClipMaskInstance, ClipScrollNodeIndex, SplitCompositeInstance};
-use gpu_types::{PrimitiveInstance, RasterizationSpace, GlyphInstance, ZBufferId};
-use gpu_types::ZBufferIdGenerator;
+use gpu_types::{BrushFlags, BrushInstance, PrimitiveHeaders};
+use gpu_types::{ClipMaskInstance, SplitCompositeInstance};
+use gpu_types::{PrimitiveInstance, RasterizationSpace, GlyphInstance};
+use gpu_types::{PrimitiveHeader, PrimitiveHeaderIndex, TransformPaletteId, TransformPalette};
 use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
 use picture::{PictureCompositeMode, PicturePrimitive, PictureSurface};
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{BrushKind, BrushPrimitive, BrushSegmentTaskId, DeferredResolve};
 use prim_store::{EdgeAaSegmentMask, ImageSource, PictureIndex, PrimitiveIndex, PrimitiveKind};
 use prim_store::{PrimitiveMetadata, PrimitiveRun, PrimitiveStore, VisibleGradientTile};
 use prim_store::{BorderSource};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKind, RenderTaskTree};
 use renderer::{BlendMode, ImageBufferKind};
 use renderer::{BLOCKS_PER_UV_RECT, ShaderColorMode};
 use resource_cache::{CacheItem, GlyphFetchResult, ImageRequest, ResourceCache};
 use scene::FilterOpHelpers;
 use std::{usize, f32, i32};
 use tiling::{RenderTargetContext};
-use util::{MatrixHelpers, TransformedRectKind};
+use util::{TransformedRectKind};
 
 // Special sentinel value recognized by the shader. It is considered to be
 // a dummy task that doesn't mask out anything.
 const OPAQUE_TASK_ADDRESS: RenderTaskAddress = RenderTaskAddress(0x7fff);
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -450,17 +450,17 @@ impl AlphaBatchBuilder {
     pub fn add_pic_to_batch(
         &mut self,
         pic: &PicturePrimitive,
         task_id: RenderTaskId,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
-        z_generator: &mut ZBufferIdGenerator,
+        prim_headers: &mut PrimitiveHeaders,
     ) {
         let task_address = render_tasks.get_task_address(task_id);
 
         let task = &render_tasks[task_id];
         let content_origin = match task.kind {
             RenderTaskKind::Picture(ref pic_task) => {
                 pic_task.content_origin
             }
@@ -471,29 +471,29 @@ impl AlphaBatchBuilder {
 
         // Even though most of the time a splitter isn't used or needed,
         // they are cheap to construct so we will always pass one down.
         let mut splitter = BspSplitter::new();
 
         // Add each run in this picture to the batch.
         for run in &pic.runs {
             let scroll_node = &ctx.clip_scroll_tree.nodes[run.clip_and_scroll.scroll_node_id.0];
-            let scroll_id = scroll_node.node_data_index;
+            let transform_id = ctx.transforms.get_id(scroll_node.transform_index);
             self.add_run_to_batch(
                 run,
-                scroll_id,
+                transform_id,
                 ctx,
                 gpu_cache,
                 render_tasks,
                 task_id,
                 task_address,
                 deferred_resolves,
                 &mut splitter,
                 content_origin,
-                z_generator,
+                prim_headers,
             );
         }
 
         // Flush the accumulated plane splits onto the task tree.
         // Z axis is directed at the screen, `sort` is ascending, and we need back-to-front order.
         for poly in splitter.sort(vec3(0.0, 0.0, 1.0)) {
             let prim_index = PrimitiveIndex(poly.anchor);
             debug!("process sorted poly {:?} {:?}", prim_index, poly.points);
@@ -521,92 +521,88 @@ impl AlphaBatchBuilder {
                 .resolve_render_task_id();
             let source_task_address = render_tasks.get_task_address(source_task_id);
             let gpu_address = gpu_cache.get_address(&gpu_handle);
 
             let instance = SplitCompositeInstance::new(
                 task_address,
                 source_task_address,
                 gpu_address,
-                z_generator.next(),
+                prim_headers.z_generator.next(),
             );
 
             batch.push(PrimitiveInstance::from(instance));
         }
     }
 
     // Helper to add an entire primitive run to a batch list.
     // TODO(gw): Restructure this so the param list isn't quite
     //           so daunting!
     fn add_run_to_batch(
         &mut self,
         run: &PrimitiveRun,
-        scroll_id: ClipScrollNodeIndex,
+        transform_id: TransformPaletteId,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         task_id: RenderTaskId,
         task_address: RenderTaskAddress,
         deferred_resolves: &mut Vec<DeferredResolve>,
         splitter: &mut BspSplitter<f64, WorldPixel>,
         content_origin: DeviceIntPoint,
-        z_generator: &mut ZBufferIdGenerator,
+        prim_headers: &mut PrimitiveHeaders,
     ) {
         for i in 0 .. run.count {
             let prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
             let metadata = &ctx.prim_store.cpu_metadata[prim_index.0];
 
             if metadata.screen_rect.is_some() {
                 self.add_prim_to_batch(
-                    metadata.clip_chain_rect_index,
-                    scroll_id,
+                    transform_id,
                     prim_index,
                     ctx,
                     gpu_cache,
                     render_tasks,
                     task_id,
                     task_address,
                     deferred_resolves,
                     splitter,
                     content_origin,
-                    z_generator,
+                    prim_headers,
                 );
             }
         }
     }
 
     // Adds a primitive to a batch.
     // It can recursively call itself in some situations, for
     // example if it encounters a picture where the items
     // in that picture are being drawn into the same target.
     fn add_prim_to_batch(
         &mut self,
-        clip_chain_rect_index: ClipChainRectIndex,
-        scroll_id: ClipScrollNodeIndex,
+        transform_id: TransformPaletteId,
         prim_index: PrimitiveIndex,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         task_id: RenderTaskId,
         task_address: RenderTaskAddress,
         deferred_resolves: &mut Vec<DeferredResolve>,
         splitter: &mut BspSplitter<f64, WorldPixel>,
         content_origin: DeviceIntPoint,
-        z_generator: &mut ZBufferIdGenerator,
+        prim_headers: &mut PrimitiveHeaders,
     ) {
-        let z = z_generator.next();
         let prim_metadata = ctx.prim_store.get_metadata(prim_index);
         #[cfg(debug_assertions)] //TODO: why is this needed?
         debug_assert_eq!(prim_metadata.prepared_frame_id, render_tasks.frame_id());
 
-        let scroll_node = &ctx.node_data[scroll_id.0 as usize];
         // TODO(gw): Calculating this for every primitive is a bit
         //           wasteful. We should probably cache this in
         //           the scroll node...
-        let transform_kind = scroll_node.transform.transform_kind();
+        let transform_kind = transform_id.transform_kind();
 
         let screen_rect = prim_metadata.screen_rect.expect("bug");
         let task_relative_bounding_rect = DeviceIntRect::new(
             DeviceIntPoint::new(
                 screen_rect.unclipped.origin.x - content_origin.x,
                 screen_rect.unclipped.origin.y - content_origin.y,
             ),
             screen_rect.unclipped.size,
@@ -643,16 +639,25 @@ impl AlphaBatchBuilder {
         let non_segmented_blend_mode = if !prim_metadata.opacity.is_opaque ||
             prim_metadata.clip_task_id.is_some() ||
             transform_kind == TransformedRectKind::Complex {
             specified_blend_mode
         } else {
             BlendMode::None
         };
 
+        let prim_header = PrimitiveHeader {
+            local_rect: prim_metadata.local_rect,
+            local_clip_rect: prim_metadata.combined_local_clip_rect,
+            task_address,
+            specific_prim_address: prim_cache_address,
+            clip_task_address,
+            transform_id,
+        };
+
         match prim_metadata.prim_kind {
             PrimitiveKind::Brush => {
                 let brush = &ctx.prim_store.cpu_brushes[prim_metadata.cpu_prim_index.0];
 
                 match brush.kind {
                     BrushKind::Picture { pic_index, .. } => {
                         let picture =
                             &ctx.prim_store.pictures[pic_index.0];
@@ -696,43 +701,39 @@ impl AlphaBatchBuilder {
                                                         gpu_cache,
                                                     );
                                                 let key = BatchKey::new(
                                                     kind,
                                                     non_segmented_blend_mode,
                                                     textures,
                                                 );
                                                 let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
+                                                let prim_header_index = prim_headers.push(&prim_header, [
+                                                    uv_rect_address.as_int(),
+                                                    (ShaderColorMode::ColorBitmap as i32) << 16 |
+                                                    RasterizationSpace::Screen as i32,
+                                                    0,
+                                                ]);
 
                                                 let instance = BrushInstance {
-                                                    picture_address: task_address,
-                                                    prim_address: prim_cache_address,
-                                                    clip_chain_rect_index,
-                                                    scroll_id,
-                                                    clip_task_address,
-                                                    z,
+                                                    prim_header_index,
                                                     segment_index: 0,
                                                     edge_flags: EdgeAaSegmentMask::empty(),
                                                     brush_flags: BrushFlags::empty(),
-                                                    user_data: [
-                                                        uv_rect_address.as_int(),
-                                                        (ShaderColorMode::ColorBitmap as i32) << 16 |
-                                                        RasterizationSpace::Screen as i32,
-                                                        0,
-                                                    ],
+                                                    clip_task_address,
                                                 };
                                                 batch.push(PrimitiveInstance::from(instance));
                                                 false
                                             }
                                             None => {
                                                 true
                                             }
                                         }
                                     }
-                                    FilterOp::DropShadow(..) => {
+                                    FilterOp::DropShadow(offset, ..) => {
                                         // Draw an instance of the shadow first, following by the content.
 
                                         // Both the shadow and the content get drawn as a brush image.
                                         if let Some(ref surface) = picture.surface {
                                             let kind = BatchKind::Brush(
                                                 BrushBatchKind::Image(ImageBufferKind::Texture2DArray),
                                             );
 
@@ -763,43 +764,54 @@ impl AlphaBatchBuilder {
                                                 .as_int();
                                             let content_uv_rect_address = render_tasks[secondary_id]
                                                 .get_texture_address(gpu_cache)
                                                 .as_int();
 
                                             // Get the GPU cache address of the extra data handle.
                                             let shadow_prim_address = gpu_cache.get_address(&picture.extra_gpu_data_handle);
 
+                                            let content_prim_header_index = prim_headers.push(&prim_header, [
+                                                content_uv_rect_address,
+                                                (ShaderColorMode::ColorBitmap as i32) << 16 |
+                                                RasterizationSpace::Screen as i32,
+                                                0,
+                                            ]);
+
+                                            let shadow_rect = prim_metadata.local_rect.translate(&offset);
+                                            let shadow_clip_rect = prim_metadata.local_clip_rect.translate(&offset);
+
+                                            let shadow_prim_header = PrimitiveHeader {
+                                                local_rect: shadow_rect,
+                                                local_clip_rect: shadow_clip_rect,
+                                                specific_prim_address: shadow_prim_address,
+                                                ..prim_header
+                                            };
+
+                                            let shadow_prim_header_index = prim_headers.push(&shadow_prim_header, [
+                                                shadow_uv_rect_address,
+                                                (ShaderColorMode::Alpha as i32) << 16 |
+                                                RasterizationSpace::Screen as i32,
+                                                0,
+                                            ]);
+
                                             let shadow_instance = BrushInstance {
-                                                picture_address: task_address,
-                                                prim_address: shadow_prim_address,
-                                                clip_chain_rect_index,
-                                                scroll_id,
+                                                prim_header_index: shadow_prim_header_index,
                                                 clip_task_address,
-                                                z,
                                                 segment_index: 0,
                                                 edge_flags: EdgeAaSegmentMask::empty(),
                                                 brush_flags: BrushFlags::empty(),
-                                                user_data: [
-                                                    shadow_uv_rect_address,
-                                                    (ShaderColorMode::Alpha as i32) << 16 |
-                                                    RasterizationSpace::Screen as i32,
-                                                    0,
-                                                ],
                                             };
 
                                             let content_instance = BrushInstance {
-                                                prim_address: prim_cache_address,
-                                                user_data: [
-                                                    content_uv_rect_address,
-                                                    (ShaderColorMode::ColorBitmap as i32) << 16 |
-                                                    RasterizationSpace::Screen as i32,
-                                                    0,
-                                                ],
-                                                ..shadow_instance
+                                                prim_header_index: content_prim_header_index,
+                                                clip_task_address,
+                                                segment_index: 0,
+                                                edge_flags: EdgeAaSegmentMask::empty(),
+                                                brush_flags: BrushFlags::empty(),
                                             };
 
                                             self.batch_list
                                                 .get_suitable_batch(shadow_key, &task_relative_bounding_rect)
                                                 .push(PrimitiveInstance::from(shadow_instance));
 
                                             self.batch_list
                                                 .get_suitable_batch(content_key, &task_relative_bounding_rect)
@@ -851,32 +863,28 @@ impl AlphaBatchBuilder {
                                                     }
                                                     FilterOp::ColorMatrix(_) => {
                                                         picture.extra_gpu_data_handle.as_int(gpu_cache)
                                                     }
                                                 };
 
                                                 let cache_task_id = surface.resolve_render_task_id();
                                                 let cache_task_address = render_tasks.get_task_address(cache_task_id);
+                                                let prim_header_index = prim_headers.push(&prim_header, [
+                                                    cache_task_address.0 as i32,
+                                                    filter_mode,
+                                                    user_data,
+                                                ]);
 
                                                 let instance = BrushInstance {
-                                                    picture_address: task_address,
-                                                    prim_address: prim_cache_address,
-                                                    clip_chain_rect_index,
-                                                    scroll_id,
+                                                    prim_header_index,
                                                     clip_task_address,
-                                                    z,
                                                     segment_index: 0,
                                                     edge_flags: EdgeAaSegmentMask::empty(),
                                                     brush_flags: BrushFlags::empty(),
-                                                    user_data: [
-                                                        cache_task_address.0 as i32,
-                                                        filter_mode,
-                                                        user_data,
-                                                    ],
                                                 };
 
                                                 let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
                                                 batch.push(PrimitiveInstance::from(instance));
                                                 false
                                             }
                                             None => {
                                                 true
@@ -902,32 +910,28 @@ impl AlphaBatchBuilder {
                                         },
                                     ),
                                     BlendMode::PremultipliedAlpha,
                                     BatchTextures::no_texture(),
                                 );
                                 let batch = self.batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
                                 let backdrop_task_address = render_tasks.get_task_address(backdrop_id);
                                 let source_task_address = render_tasks.get_task_address(cache_task_id);
+                                let prim_header_index = prim_headers.push(&prim_header, [
+                                    mode as u32 as i32,
+                                    backdrop_task_address.0 as i32,
+                                    source_task_address.0 as i32,
+                                ]);
 
                                 let instance = BrushInstance {
-                                    picture_address: task_address,
-                                    prim_address: prim_cache_address,
-                                    clip_chain_rect_index,
-                                    scroll_id,
+                                    prim_header_index,
                                     clip_task_address,
-                                    z,
                                     segment_index: 0,
                                     edge_flags: EdgeAaSegmentMask::empty(),
                                     brush_flags: BrushFlags::empty(),
-                                    user_data: [
-                                        mode as u32 as i32,
-                                        backdrop_task_address.0 as i32,
-                                        source_task_address.0 as i32,
-                                    ],
                                 };
 
                                 batch.push(PrimitiveInstance::from(instance));
                                 false
                             }
                             Some(PictureCompositeMode::Blit) => {
                                 let cache_task_id = picture
                                     .surface
@@ -945,33 +949,29 @@ impl AlphaBatchBuilder {
                                 let batch = self.batch_list.get_suitable_batch(
                                     key,
                                     &task_relative_bounding_rect
                                 );
 
                                 let uv_rect_address = render_tasks[cache_task_id]
                                     .get_texture_address(gpu_cache)
                                     .as_int();
+                                let prim_header_index = prim_headers.push(&prim_header, [
+                                    uv_rect_address,
+                                    (ShaderColorMode::ColorBitmap as i32) << 16 |
+                                    RasterizationSpace::Screen as i32,
+                                    0,
+                                ]);
 
                                 let instance = BrushInstance {
-                                    picture_address: task_address,
-                                    prim_address: prim_cache_address,
-                                    clip_chain_rect_index,
-                                    scroll_id,
+                                    prim_header_index,
                                     clip_task_address,
-                                    z,
                                     segment_index: 0,
                                     edge_flags: EdgeAaSegmentMask::empty(),
                                     brush_flags: BrushFlags::empty(),
-                                    user_data: [
-                                        uv_rect_address,
-                                        (ShaderColorMode::ColorBitmap as i32) << 16 |
-                                        RasterizationSpace::Screen as i32,
-                                        0,
-                                    ],
                                 };
                                 batch.push(PrimitiveInstance::from(instance));
                                 false
                             }
                             None => {
                                 true
                             }
                         };
@@ -981,121 +981,113 @@ impl AlphaBatchBuilder {
                         if add_to_parent_pic {
                             self.add_pic_to_batch(
                                 picture,
                                 task_id,
                                 ctx,
                                 gpu_cache,
                                 render_tasks,
                                 deferred_resolves,
-                                z_generator,
+                                prim_headers,
                             );
                         }
                     }
                     BrushKind::Image { request, ref visible_tiles, .. } if !visible_tiles.is_empty() => {
                         for tile in visible_tiles {
                             if let Some((batch_kind, textures, user_data)) = get_image_tile_params(
                                     ctx.resource_cache,
                                     gpu_cache,
                                     deferred_resolves,
                                     request.with_tile(tile.tile_offset),
                             ) {
                                 let prim_cache_address = gpu_cache.get_address(&tile.handle);
+                                let prim_header = PrimitiveHeader {
+                                    specific_prim_address: prim_cache_address,
+                                    local_rect: tile.local_rect,
+                                    local_clip_rect: tile.local_clip_rect,
+                                    ..prim_header
+                                };
+                                let prim_header_index = prim_headers.push(&prim_header, user_data);
+
                                 self.add_image_tile_to_batch(
                                     batch_kind,
                                     specified_blend_mode,
                                     textures,
-                                    clip_chain_rect_index,
+                                    prim_header_index,
                                     clip_task_address,
                                     &task_relative_bounding_rect,
-                                    prim_cache_address,
-                                    scroll_id,
-                                    task_address,
-                                    z,
-                                    user_data,
                                     tile.edge_flags
                                 );
                             }
                         }
                     }
                     BrushKind::LinearGradient { ref stops_handle, ref visible_tiles, .. } if !visible_tiles.is_empty() => {
                         add_gradient_tiles(
                             visible_tiles,
                             stops_handle,
                             BrushBatchKind::LinearGradient,
                             specified_blend_mode,
                             &task_relative_bounding_rect,
-                            clip_chain_rect_index,
-                            scroll_id,
-                            task_address,
                             clip_task_address,
-                            z,
                             gpu_cache,
                             &mut self.batch_list,
+                            &prim_header,
+                            prim_headers,
                         );
                     }
                     BrushKind::RadialGradient { ref stops_handle, ref visible_tiles, .. } if !visible_tiles.is_empty() => {
                         add_gradient_tiles(
                             visible_tiles,
                             stops_handle,
                             BrushBatchKind::RadialGradient,
                             specified_blend_mode,
                             &task_relative_bounding_rect,
-                            clip_chain_rect_index,
-                            scroll_id,
-                            task_address,
                             clip_task_address,
-                            z,
                             gpu_cache,
                             &mut self.batch_list,
+                            &prim_header,
+                            prim_headers,
                         );
                     }
                     _ => {
                         if let Some((batch_kind, textures, user_data)) = brush.get_batch_params(
                                 ctx.resource_cache,
                                 gpu_cache,
                                 deferred_resolves,
                         ) {
+                            let prim_header_index = prim_headers.push(&prim_header, user_data);
+
                             self.add_brush_to_batch(
                                 brush,
                                 prim_metadata,
                                 batch_kind,
                                 specified_blend_mode,
                                 non_segmented_blend_mode,
                                 textures,
-                                clip_chain_rect_index,
+                                prim_header_index,
                                 clip_task_address,
                                 &task_relative_bounding_rect,
-                                prim_cache_address,
-                                scroll_id,
-                                task_address,
                                 transform_kind,
-                                z,
                                 render_tasks,
-                                user_data,
                             );
                         }
                     }
                 }
             }
             PrimitiveKind::TextRun => {
                 let text_cpu =
                     &ctx.prim_store.cpu_text_runs[prim_metadata.cpu_prim_index.0];
 
-                let font = text_cpu.get_font(
-                    ctx.device_pixel_scale,
-                    scroll_node.transform,
-                );
-                let subpx_dir = font.get_subpx_dir();
+                let subpx_dir = text_cpu.used_font.get_subpx_dir();
 
                 let glyph_fetch_buffer = &mut self.glyph_fetch_buffer;
                 let batch_list = &mut self.batch_list;
 
                 ctx.resource_cache.fetch_glyphs(
-                    font,
+                    text_cpu.used_font.clone(),
                     &text_cpu.glyph_keys,
                     glyph_fetch_buffer,
                     gpu_cache,
                     |texture_id, mut glyph_format, glyphs| {
                         debug_assert_ne!(texture_id, SourceTexture::Invalid);
 
                         // Ignore color and only sample alpha when shadowing.
                         if text_cpu.shadow {
@@ -1112,29 +1104,29 @@ impl AlphaBatchBuilder {
                             ],
                         };
 
                         let kind = BatchKind::TextRun(glyph_format);
 
                         let (blend_mode, color_mode) = match glyph_format {
                             GlyphFormat::Subpixel |
                             GlyphFormat::TransformedSubpixel => {
-                                if text_cpu.font.bg_color.a != 0 {
+                                if text_cpu.used_font.bg_color.a != 0 {
                                     (
                                         BlendMode::SubpixelWithBgColor,
                                         ShaderColorMode::FromRenderPassMode,
                                     )
                                 } else if ctx.use_dual_source_blending {
                                     (
                                         BlendMode::SubpixelDualSource,
                                         ShaderColorMode::SubpixelDualSource,
                                     )
                                 } else {
                                     (
-                                        BlendMode::SubpixelConstantTextColor(text_cpu.font.color.into()),
+                                        BlendMode::SubpixelConstantTextColor(text_cpu.used_font.color.into()),
                                         ShaderColorMode::SubpixelConstantTextColor,
                                     )
                                 }
                             }
                             GlyphFormat::Alpha |
                             GlyphFormat::TransformedAlpha => {
                                 (
                                     BlendMode::PremultipliedAlpha,
@@ -1150,25 +1142,21 @@ impl AlphaBatchBuilder {
                             GlyphFormat::ColorBitmap => {
                                 (
                                     BlendMode::PremultipliedAlpha,
                                     ShaderColorMode::ColorBitmap,
                                 )
                             }
                         };
 
+                        let prim_header_index = prim_headers.push(&prim_header, [0; 3]);
                         let key = BatchKey::new(kind, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
                         let base_instance = GlyphInstance::new(
-                            prim_cache_address,
-                            task_address,
-                            clip_task_address,
-                            clip_chain_rect_index,
-                            scroll_id,
-                            z,
+                            prim_header_index,
                         );
 
                         for glyph in glyphs {
                             batch.push(base_instance.build(
                                 glyph.index_in_text_run,
                                 glyph.uv_rect_address.as_int(),
                                 (subpx_dir as u32 as i32) << 16 |
                                 (color_mode as u32 as i32),
@@ -1180,37 +1168,27 @@ impl AlphaBatchBuilder {
         }
     }
 
     fn add_image_tile_to_batch(
         &mut self,
         batch_kind: BrushBatchKind,
         blend_mode: BlendMode,
         textures: BatchTextures,
-        clip_chain_rect_index: ClipChainRectIndex,
+        prim_header_index: PrimitiveHeaderIndex,
         clip_task_address: RenderTaskAddress,
         task_relative_bounding_rect: &DeviceIntRect,
-        prim_cache_address: GpuCacheAddress,
-        scroll_id: ClipScrollNodeIndex,
-        task_address: RenderTaskAddress,
-        z: ZBufferId,
-        user_data: [i32; 3],
         edge_flags: EdgeAaSegmentMask,
     ) {
         let base_instance = BrushInstance {
-            picture_address: task_address,
-            prim_address: prim_cache_address,
-            clip_chain_rect_index,
-            scroll_id,
+            prim_header_index,
             clip_task_address,
-            z,
             segment_index: 0,
             edge_flags,
             brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
-            user_data,
         };
 
         self.batch_list.add_bounding_rect(task_relative_bounding_rect);
 
         let batch_key = BatchKey {
             blend_mode,
             kind: BatchKind::Brush(batch_kind),
             textures,
@@ -1222,38 +1200,28 @@ impl AlphaBatchBuilder {
     fn add_brush_to_batch(
         &mut self,
         brush: &BrushPrimitive,
         prim_metadata: &PrimitiveMetadata,
         batch_kind: BrushBatchKind,
         alpha_blend_mode: BlendMode,
         non_segmented_blend_mode: BlendMode,
         textures: BatchTextures,
-        clip_chain_rect_index: ClipChainRectIndex,
+        prim_header_index: PrimitiveHeaderIndex,
         clip_task_address: RenderTaskAddress,
         task_relative_bounding_rect: &DeviceIntRect,
-        prim_cache_address: GpuCacheAddress,
-        scroll_id: ClipScrollNodeIndex,
-        task_address: RenderTaskAddress,
         transform_kind: TransformedRectKind,
-        z: ZBufferId,
         render_tasks: &RenderTaskTree,
-        user_data: [i32; 3],
     ) {
         let base_instance = BrushInstance {
-            picture_address: task_address,
-            prim_address: prim_cache_address,
-            clip_chain_rect_index,
-            scroll_id,
+            prim_header_index,
             clip_task_address,
-            z,
             segment_index: 0,
             edge_flags: EdgeAaSegmentMask::all(),
             brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
-            user_data,
         };
 
         self.batch_list.add_bounding_rect(task_relative_bounding_rect);
 
         match brush.segment_desc {
             Some(ref segment_desc) => {
                 let alpha_batch_key = BatchKey {
                     blend_mode: alpha_blend_mode,
@@ -1321,54 +1289,50 @@ impl AlphaBatchBuilder {
 }
 
 fn add_gradient_tiles(
     visible_tiles: &[VisibleGradientTile],
     stops_handle: &GpuCacheHandle,
     kind: BrushBatchKind,
     blend_mode: BlendMode,
     task_relative_bounding_rect: &DeviceIntRect,
-    clip_chain_rect_index: ClipChainRectIndex,
-    scroll_id: ClipScrollNodeIndex,
-    task_address: RenderTaskAddress,
     clip_task_address: RenderTaskAddress,
-    z: ZBufferId,
     gpu_cache: &GpuCache,
     batch_list: &mut BatchList,
+    base_prim_header: &PrimitiveHeader,
+    prim_headers: &mut PrimitiveHeaders,
 ) {
     batch_list.add_bounding_rect(task_relative_bounding_rect);
     let batch = batch_list.get_suitable_batch(
         BatchKey {
             blend_mode: blend_mode,
             kind: BatchKind::Brush(kind),
             textures: BatchTextures::no_texture(),
         },
         task_relative_bounding_rect
     );
 
     let user_data = [stops_handle.as_int(gpu_cache), 0, 0];
 
-    let base_instance = BrushInstance {
-        picture_address: task_address,
-        prim_address: GpuCacheAddress::invalid(),
-        clip_chain_rect_index,
-        scroll_id,
-        clip_task_address,
-        z,
-        segment_index: 0,
-        edge_flags: EdgeAaSegmentMask::all(),
-        brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
-        user_data,
-    };
+    for tile in visible_tiles {
+        let prim_header = PrimitiveHeader {
+            specific_prim_address: gpu_cache.get_address(&tile.handle),
+            local_rect: tile.local_rect,
+            local_clip_rect: tile.local_clip_rect,
+            ..*base_prim_header
+        };
+        let prim_header_index = prim_headers.push(&prim_header, user_data);
 
-    for tile in visible_tiles {
         batch.push(PrimitiveInstance::from(
             BrushInstance {
-                prim_address: gpu_cache.get_address(&tile.handle),
-                ..base_instance
+                prim_header_index,
+                clip_task_address,
+                segment_index: 0,
+                edge_flags: EdgeAaSegmentMask::all(),
+                brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
             }
         ));
     }
 }
 
 fn get_image_tile_params(
     resource_cache: &ResourceCache,
     gpu_cache: &mut GpuCache,
@@ -1778,39 +1742,40 @@ impl ClipBatcher {
 
     pub fn add_clip_region(
         &mut self,
         task_address: RenderTaskAddress,
         clip_data_address: GpuCacheAddress,
     ) {
         let instance = ClipMaskInstance {
             render_task_address: task_address,
-            scroll_node_data_index: ClipScrollNodeIndex(0),
+            transform_id: TransformPaletteId::identity(),
             segment: 0,
             clip_data_address,
             resource_address: GpuCacheAddress::invalid(),
         };
 
         self.rectangles.push(instance);
     }
 
     pub fn add(
         &mut self,
         task_address: RenderTaskAddress,
         clips: &[ClipWorkItem],
         coordinate_system_id: CoordinateSystemId,
         resource_cache: &ResourceCache,
         gpu_cache: &GpuCache,
         clip_store: &ClipStore,
+        transforms: &TransformPalette,
     ) {
         let mut coordinate_system_id = coordinate_system_id;
         for work_item in clips.iter() {
             let instance = ClipMaskInstance {
                 render_task_address: task_address,
-                scroll_node_data_index: work_item.scroll_node_data_index,
+                transform_id: transforms.get_id(work_item.transform_index),
                 segment: 0,
                 clip_data_address: GpuCacheAddress::invalid(),
                 resource_address: GpuCacheAddress::invalid(),
             };
             let info = clip_store
                 .get_opt(&work_item.clip_sources)
                 .expect("bug: clip handle should be valid");
 
--- a/gfx/webrender/src/clip.rs
+++ b/gfx/webrender/src/clip.rs
@@ -2,21 +2,21 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, ClipMode, ComplexClipRegion, DeviceIntRect, DevicePixelScale, ImageMask};
 use api::{ImageRendering, LayoutRect, LayoutSize, LayoutPoint, LayoutVector2D, LocalClip};
 use api::{BoxShadowClipMode, LayoutToWorldScale, LineOrientation, LineStyle};
 use border::{ensure_no_corner_overlap};
 use box_shadow::{BLUR_SAMPLE_SCALE, BoxShadowClipSource, BoxShadowCacheKey};
-use clip_scroll_tree::{ClipChainIndex, CoordinateSystemId};
+use clip_scroll_tree::{ClipChainIndex, CoordinateSystemId, TransformIndex};
 use ellipse::Ellipse;
 use freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle, ToGpuBlocks};
-use gpu_types::{BoxShadowStretchMode, ClipScrollNodeIndex};
+use gpu_types::{BoxShadowStretchMode};
 use prim_store::{ClipData, ImageMaskData};
 use render_task::to_cache_size;
 use resource_cache::{ImageRequest, ResourceCache};
 use util::{LayoutToWorldFastTransform, MaxRect, calculate_screen_bounding_rect};
 use util::{extract_inner_rect_safe, pack_as_float};
 use std::sync::Arc;
 
 #[derive(Debug)]
@@ -620,13 +620,13 @@ impl Iterator for ClipChainNodeIter {
         previous
     }
 }
 
 #[derive(Debug, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ClipWorkItem {
-    pub scroll_node_data_index: ClipScrollNodeIndex,
+    pub transform_index: TransformIndex,
     pub clip_sources: ClipSourcesWeakHandle,
     pub coordinate_system_id: CoordinateSystemId,
 }
 
--- a/gfx/webrender/src/clip_scroll_node.rs
+++ b/gfx/webrender/src/clip_scroll_node.rs
@@ -2,20 +2,20 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{DevicePixelScale, ExternalScrollId, LayoutPixel, LayoutPoint, LayoutRect, LayoutSize};
 use api::{LayoutVector2D, LayoutTransform, PipelineId, PropertyBinding};
 use api::{ScrollClamping, ScrollLocation, ScrollSensitivity, StickyOffsetBounds};
 use clip::{ClipChain, ClipChainNode, ClipSourcesHandle, ClipStore, ClipWorkItem};
 use clip_scroll_tree::{ClipChainIndex, ClipScrollNodeIndex, CoordinateSystemId};
-use clip_scroll_tree::TransformUpdateState;
+use clip_scroll_tree::{TransformUpdateState, TransformIndex};
 use euclid::SideOffsets2D;
 use gpu_cache::GpuCache;
-use gpu_types::{ClipScrollNodeIndex as GPUClipScrollNodeIndex, ClipScrollNodeData};
+use gpu_types::{TransformData, TransformPalette};
 use resource_cache::ResourceCache;
 use scene::SceneProperties;
 use util::{LayoutToWorldFastTransform, LayoutFastTransform};
 use util::{TransformedRectKind};
 
 #[derive(Debug)]
 pub struct StickyFrameInfo {
     pub frame_rect: LayoutRect,
@@ -41,51 +41,58 @@ impl StickyFrameInfo {
             horizontal_offset_bounds,
             previously_applied_offset,
             current_offset: LayoutVector2D::zero(),
         }
     }
 }
 
 #[derive(Debug)]
-pub enum NodeType {
+pub enum SpatialNodeKind {
+    /// A special kind of node that adjusts its position based on the position
+    /// of its parent node and a given set of sticky positioning offset bounds.
+    /// Sticky positioned is described in the CSS Positioned Layout Module Level 3 here:
+    /// https://www.w3.org/TR/css-position-3/#sticky-pos
+    StickyFrame(StickyFrameInfo),
+
+    /// Transforms it's content, but doesn't clip it. Can also be adjusted
+    /// by scroll events or setting scroll offsets.
+    ScrollFrame(ScrollFrameInfo),
+
     /// A reference frame establishes a new coordinate space in the tree.
     ReferenceFrame(ReferenceFrameInfo),
+}
+
+#[derive(Debug)]
+pub enum NodeType {
+    Spatial {
+        kind: SpatialNodeKind,
+    },
 
     /// Other nodes just do clipping, but no transformation.
     Clip {
         handle: ClipSourcesHandle,
         clip_chain_index: ClipChainIndex,
 
         /// A copy of the ClipChainNode this node would produce. We need to keep a copy,
         /// because the ClipChain may not contain our node if is optimized out, but API
         /// defined ClipChains will still need to access it.
         clip_chain_node: Option<ClipChainNode>,
     },
 
-    /// Transforms it's content, but doesn't clip it. Can also be adjusted
-    /// by scroll events or setting scroll offsets.
-    ScrollFrame(ScrollFrameInfo),
-
-    /// A special kind of node that adjusts its position based on the position
-    /// of its parent node and a given set of sticky positioning offset bounds.
-    /// Sticky positioned is described in the CSS Positioned Layout Module Level 3 here:
-    /// https://www.w3.org/TR/css-position-3/#sticky-pos
-    StickyFrame(StickyFrameInfo),
-
     /// An empty node, used to pad the ClipScrollTree's array of nodes so that
     /// we can immediately use each assigned ClipScrollNodeIndex. After display
     /// list flattening this node type should never be used.
     Empty,
 }
 
 impl NodeType {
     fn is_reference_frame(&self) -> bool {
         match *self {
-            NodeType::ReferenceFrame(_) => true,
+            NodeType::Spatial { kind: SpatialNodeKind::ReferenceFrame(_), .. } => true,
             _ => false,
         }
     }
 }
 
 /// Contains information common among all types of ClipScrollTree nodes.
 #[derive(Debug)]
 pub struct ClipScrollNode {
@@ -121,120 +128,151 @@ pub struct ClipScrollNode {
     /// The axis-aligned coordinate system id of this node.
     pub coordinate_system_id: CoordinateSystemId,
 
     /// The transformation from the coordinate system which established our compatible coordinate
     /// system (same coordinate system id) and us. This can change via scroll offsets and via new
     /// reference frame transforms.
     pub coordinate_system_relative_transform: LayoutFastTransform,
 
-    /// A linear ID / index of this clip-scroll node. Used as a reference to
-    /// pass to shaders, to allow them to fetch a given clip-scroll node.
-    pub node_data_index: GPUClipScrollNodeIndex,
+    /// The index of the spatial node that provides positioning information for this node.
+    /// For reference frames, scroll and sticky frames it is a unique identfier.
+    /// For clip nodes, this is the nearest ancestor spatial node.
+    pub transform_index: TransformIndex,
 }
 
 impl ClipScrollNode {
     pub fn new(
         pipeline_id: PipelineId,
         parent_index: Option<ClipScrollNodeIndex>,
-        node_type: NodeType
+        node_type: NodeType,
+        transform_index: TransformIndex,
     ) -> Self {
         ClipScrollNode {
             world_viewport_transform: LayoutToWorldFastTransform::identity(),
             world_content_transform: LayoutToWorldFastTransform::identity(),
             transform_kind: TransformedRectKind::AxisAligned,
             parent: parent_index,
             children: Vec::new(),
             pipeline_id,
             node_type,
             invertible: true,
             coordinate_system_id: CoordinateSystemId(0),
             coordinate_system_relative_transform: LayoutFastTransform::identity(),
-            node_data_index: GPUClipScrollNodeIndex(0),
+            transform_index,
         }
     }
 
     pub fn empty() -> ClipScrollNode {
-        Self::new(PipelineId::dummy(), None, NodeType::Empty)
+        Self::new(
+            PipelineId::dummy(),
+            None,
+            NodeType::Empty,
+            TransformIndex(0),
+        )
     }
 
     pub fn new_scroll_frame(
         pipeline_id: PipelineId,
         parent_index: ClipScrollNodeIndex,
         external_id: Option<ExternalScrollId>,
         frame_rect: &LayoutRect,
         content_size: &LayoutSize,
         scroll_sensitivity: ScrollSensitivity,
+        transform_index: TransformIndex,
     ) -> Self {
-        let node_type = NodeType::ScrollFrame(ScrollFrameInfo::new(
-            *frame_rect,
-            scroll_sensitivity,
-            LayoutSize::new(
-                (content_size.width - frame_rect.size.width).max(0.0),
-                (content_size.height - frame_rect.size.height).max(0.0)
-            ),
-            external_id,
-        ));
+        let node_type = NodeType::Spatial {
+            kind: SpatialNodeKind::ScrollFrame(ScrollFrameInfo::new(
+                *frame_rect,
+                scroll_sensitivity,
+                LayoutSize::new(
+                    (content_size.width - frame_rect.size.width).max(0.0),
+                    (content_size.height - frame_rect.size.height).max(0.0)
+                ),
+                external_id,
+            )),
+        };
 
-        Self::new(pipeline_id, Some(parent_index), node_type)
+        Self::new(
+            pipeline_id,
+            Some(parent_index),
+            node_type,
+            transform_index,
+        )
     }
 
     pub fn new_reference_frame(
         parent_index: Option<ClipScrollNodeIndex>,
         source_transform: Option<PropertyBinding<LayoutTransform>>,
         source_perspective: Option<LayoutTransform>,
         origin_in_parent_reference_frame: LayoutVector2D,
         pipeline_id: PipelineId,
+        transform_index: TransformIndex,
     ) -> Self {
         let identity = LayoutTransform::identity();
         let source_perspective = source_perspective.map_or_else(
             LayoutFastTransform::identity, |perspective| perspective.into());
         let info = ReferenceFrameInfo {
             resolved_transform: LayoutFastTransform::identity(),
             source_transform: source_transform.unwrap_or(PropertyBinding::Value(identity)),
             source_perspective,
             origin_in_parent_reference_frame,
             invertible: true,
         };
-        Self::new(pipeline_id, parent_index, NodeType::ReferenceFrame(info))
+        Self::new(
+            pipeline_id,
+            parent_index,
+            NodeType::Spatial {
+                kind: SpatialNodeKind::ReferenceFrame(info),
+            },
+            transform_index,
+        )
     }
 
     pub fn new_sticky_frame(
         parent_index: ClipScrollNodeIndex,
         sticky_frame_info: StickyFrameInfo,
         pipeline_id: PipelineId,
+        transform_index: TransformIndex,
     ) -> Self {
-        let node_type = NodeType::StickyFrame(sticky_frame_info);
-        Self::new(pipeline_id, Some(parent_index), node_type)
+        let node_type = NodeType::Spatial {
+            kind: SpatialNodeKind::StickyFrame(sticky_frame_info),
+        };
+        Self::new(
+            pipeline_id,
+            Some(parent_index),
+            node_type,
+            transform_index,
+        )
     }
 
 
     pub fn add_child(&mut self, child: ClipScrollNodeIndex) {
         self.children.push(child);
     }
 
     pub fn apply_old_scrolling_state(&mut self, old_scroll_info: &ScrollFrameInfo) {
         match self.node_type {
-            NodeType::ScrollFrame(ref mut scrolling) => {
+            NodeType::Spatial { kind: SpatialNodeKind::ScrollFrame(ref mut scrolling), .. } => {
                 *scrolling = scrolling.combine_with_old_scroll_info(old_scroll_info);
             }
             _ if old_scroll_info.offset != LayoutVector2D::zero() => {
                 warn!("Tried to scroll a non-scroll node.")
             }
             _ => {}
         }
     }
 
     pub fn set_scroll_origin(&mut self, origin: &LayoutPoint, clamp: ScrollClamping) -> bool {
         let scrollable_size = self.scrollable_size();
         let scrollable_width = scrollable_size.width;
         let scrollable_height = scrollable_size.height;
 
         let scrolling = match self.node_type {
-            NodeType::ScrollFrame(ref mut scrolling) => scrolling,
+            NodeType::Spatial { kind: SpatialNodeKind::ScrollFrame(ref mut scrolling), .. } => scrolling,
             _ => {
                 warn!("Tried to scroll a non-scroll node.");
                 return false;
             }
         };
 
         let new_offset = match clamp {
             ScrollClamping::ToContentBounds => {
@@ -260,39 +298,42 @@ impl ClipScrollNode {
     }
 
     pub fn mark_uninvertible(&mut self) {
         self.invertible = false;
         self.world_content_transform = LayoutToWorldFastTransform::identity();
         self.world_viewport_transform = LayoutToWorldFastTransform::identity();
     }
 
-    pub fn push_gpu_node_data(&mut self, node_data: &mut Vec<ClipScrollNodeData>) {
-        if !self.invertible {
-            node_data.push(ClipScrollNodeData::invalid());
-            return;
-        }
-
-        let inv_transform = match self.world_content_transform.inverse() {
-            Some(inverted) => inverted.to_transform(),
-            None => {
-                node_data.push(ClipScrollNodeData::invalid());
+    pub fn push_gpu_data(
+        &mut self,
+        transform_palette: &mut TransformPalette,
+    ) {
+        if let NodeType::Spatial { .. } = self.node_type {
+            if !self.invertible {
+                transform_palette.set(self.transform_index, TransformData::invalid());
                 return;
             }
-        };
+
+            let inv_transform = match self.world_content_transform.inverse() {
+                Some(inverted) => inverted.to_transform(),
+                None => {
+                    transform_palette.set(self.transform_index, TransformData::invalid());
+                    return;
+                }
+            };
 
-        let data = ClipScrollNodeData {
-            transform: self.world_content_transform.into(),
-            inv_transform,
-            transform_kind: self.transform_kind as u32 as f32,
-            padding: [0.0; 3],
-        };
+            let data = TransformData {
+                transform: self.world_content_transform.into(),
+                inv_transform,
+            };
 
-        // Write the data that will be made available to the GPU for this node.
-        node_data.push(data);
+            // Write the data that will be made available to the GPU for this node.
+            transform_palette.set(self.transform_index, data);
+        }
     }
 
     pub fn update(
         &mut self,
         state: &mut TransformUpdateState,
         next_coordinate_system_id: &mut CoordinateSystemId,
         device_pixel_scale: DevicePixelScale,
         clip_store: &mut ClipStore,
@@ -315,17 +356,17 @@ impl ClipScrollNode {
         } else {
             TransformedRectKind::Complex
         };
 
         // If this node is a reference frame, we check if it has a non-invertible matrix.
         // For non-reference-frames we assume that they will produce only additional
         // translations which should be invertible.
         match self.node_type {
-            NodeType::ReferenceFrame(info) if !info.invertible => {
+            NodeType::Spatial { kind: SpatialNodeKind::ReferenceFrame(info), .. } if !info.invertible => {
                 self.mark_uninvertible();
                 return;
             }
             _ => self.invertible = true,
         }
 
         self.update_clip_work_item(
             state,
@@ -342,17 +383,17 @@ impl ClipScrollNode {
         state: &mut TransformUpdateState,
         device_pixel_scale: DevicePixelScale,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         clip_chains: &mut [ClipChain],
     ) {
         let (clip_sources_handle, clip_chain_index, stored_clip_chain_node) = match self.node_type {
-            NodeType::Clip { ref handle, clip_chain_index, ref mut clip_chain_node } =>
+            NodeType::Clip { ref handle, clip_chain_index, ref mut clip_chain_node, .. } =>
                 (handle, clip_chain_index, clip_chain_node),
             _ => {
                 self.invertible = true;
                 return;
             }
         };
 
         let clip_sources = clip_store.get_mut(clip_sources_handle);
@@ -373,17 +414,17 @@ impl ClipScrollNode {
         // Rectangle ClipSource.
         let screen_outer_rect = screen_outer_rect
             .expect("Clipping node didn't have outer rect.");
         let local_outer_rect = clip_sources.local_outer_rect
             .expect("Clipping node didn't have outer rect.");
 
         let new_node = ClipChainNode {
             work_item: ClipWorkItem {
-                scroll_node_data_index: self.node_data_index,
+                transform_index: self.transform_index,
                 clip_sources: clip_sources_handle.weak(),
                 coordinate_system_id: state.current_coordinate_system_id,
             },
             local_clip_rect:
                 self.coordinate_system_relative_transform.transform_rect(&local_outer_rect),
             screen_outer_rect,
             screen_inner_rect,
             prev: None,
@@ -437,32 +478,31 @@ impl ClipScrollNode {
         } else {
             self.world_viewport_transform
         };
 
         let added_offset = state.parent_accumulated_scroll_offset + sticky_offset + scroll_offset;
         self.coordinate_system_relative_transform =
             state.coordinate_system_relative_transform.offset(added_offset);
 
-        match self.node_type {
-            NodeType::StickyFrame(ref mut info) => info.current_offset = sticky_offset,
-            _ => {},
+        if let NodeType::Spatial { kind: SpatialNodeKind::StickyFrame(ref mut info), .. } = self.node_type {
+            info.current_offset = sticky_offset;
         }
 
         self.coordinate_system_id = state.current_coordinate_system_id;
     }
 
     pub fn update_transform_for_reference_frame(
         &mut self,
         state: &mut TransformUpdateState,
         next_coordinate_system_id: &mut CoordinateSystemId,
         scene_properties: &SceneProperties,
     ) {
         let info = match self.node_type {
-            NodeType::ReferenceFrame(ref mut info) => info,
+            NodeType::Spatial { kind: SpatialNodeKind::ReferenceFrame(ref mut info), .. } => info,
             _ => unreachable!("Called update_transform_for_reference_frame on non-ReferenceFrame"),
         };
 
         // Resolve the transform against any property bindings.
         let source_transform = scene_properties.resolve_layout_transform(&info.source_transform);
         info.resolved_transform =
             LayoutFastTransform::with_vector(info.origin_in_parent_reference_frame)
             .pre_mul(&source_transform.into())
@@ -500,17 +540,17 @@ impl ClipScrollNode {
     }
 
     fn calculate_sticky_offset(
         &self,
         viewport_scroll_offset: &LayoutVector2D,
         viewport_rect: &LayoutRect,
     ) -> LayoutVector2D {
         let info = match self.node_type {
-            NodeType::StickyFrame(ref info) => info,
+            NodeType::Spatial { kind: SpatialNodeKind::StickyFrame(ref info), .. } => info,
             _ => return LayoutVector2D::zero(),
         };
 
         if info.margins.top.is_none() && info.margins.bottom.is_none() &&
             info.margins.left.is_none() && info.margins.right.is_none() {
             return LayoutVector2D::zero();
         }
 
@@ -614,55 +654,58 @@ impl ClipScrollNode {
             return;
         }
 
         // The transformation we are passing is the transformation of the parent
         // reference frame and the offset is the accumulated offset of all the nodes
         // between us and the parent reference frame. If we are a reference frame,
         // we need to reset both these values.
         match self.node_type {
-            NodeType::ReferenceFrame(ref info) => {
-                state.parent_reference_frame_transform = self.world_viewport_transform;
-                state.parent_accumulated_scroll_offset = LayoutVector2D::zero();
-                state.coordinate_system_relative_transform =
-                    self.coordinate_system_relative_transform.clone();
-                let translation = -info.origin_in_parent_reference_frame;
-                state.nearest_scrolling_ancestor_viewport =
-                    state.nearest_scrolling_ancestor_viewport
-                       .translate(&translation);
+            NodeType::Spatial { ref kind, .. } => {
+                match *kind {
+                    SpatialNodeKind::StickyFrame(ref info) => {
+                        // We don't translate the combined rect by the sticky offset, because sticky
+                        // offsets actually adjust the node position itself, whereas scroll offsets
+                        // only apply to contents inside the node.
+                        state.parent_accumulated_scroll_offset =
+                            info.current_offset + state.parent_accumulated_scroll_offset;
+                    }
+                    SpatialNodeKind::ScrollFrame(ref scrolling) => {
+                        state.parent_accumulated_scroll_offset =
+                            scrolling.offset + state.parent_accumulated_scroll_offset;
+                        state.nearest_scrolling_ancestor_offset = scrolling.offset;
+                        state.nearest_scrolling_ancestor_viewport = scrolling.viewport_rect;
+                    }
+                    SpatialNodeKind::ReferenceFrame(ref info) => {
+                        state.parent_reference_frame_transform = self.world_viewport_transform;
+                        state.parent_accumulated_scroll_offset = LayoutVector2D::zero();
+                        state.coordinate_system_relative_transform =
+                            self.coordinate_system_relative_transform.clone();
+                        let translation = -info.origin_in_parent_reference_frame;
+                        state.nearest_scrolling_ancestor_viewport =
+                            state.nearest_scrolling_ancestor_viewport
+                               .translate(&translation);
+                    }
+                }
             }
             NodeType::Clip{ .. } => { }
-            NodeType::ScrollFrame(ref scrolling) => {
-                state.parent_accumulated_scroll_offset =
-                    scrolling.offset + state.parent_accumulated_scroll_offset;
-                state.nearest_scrolling_ancestor_offset = scrolling.offset;
-                state.nearest_scrolling_ancestor_viewport = scrolling.viewport_rect;
-            }
-            NodeType::StickyFrame(ref info) => {
-                // We don't translate the combined rect by the sticky offset, because sticky
-                // offsets actually adjust the node position itself, whereas scroll offsets
-                // only apply to contents inside the node.
-                state.parent_accumulated_scroll_offset =
-                    info.current_offset + state.parent_accumulated_scroll_offset;
-            }
             NodeType::Empty => unreachable!("Empty node remaining in ClipScrollTree."),
         }
     }
 
     pub fn scrollable_size(&self) -> LayoutSize {
         match self.node_type {
-           NodeType:: ScrollFrame(state) => state.scrollable_size,
+           NodeType::Spatial { kind: SpatialNodeKind::ScrollFrame(state), .. } => state.scrollable_size,
             _ => LayoutSize::zero(),
         }
     }
 
-
     pub fn scroll(&mut self, scroll_location: ScrollLocation) -> bool {
         let scrolling = match self.node_type {
-            NodeType::ScrollFrame(ref mut scrolling) => scrolling,
+            NodeType::Spatial { kind: SpatialNodeKind::ScrollFrame(ref mut scrolling), .. } => scrolling,
             _ => return false,
         };
 
         let delta = match scroll_location {
             ScrollLocation::Delta(delta) => delta,
             ScrollLocation::Start => {
                 if scrolling.offset.y.round() >= 0.0 {
                     // Nothing to do on this layer.
@@ -702,24 +745,24 @@ impl ClipScrollNode {
                 .round();
         }
 
         scrolling.offset != original_layer_scroll_offset
     }
 
     pub fn scroll_offset(&self) -> LayoutVector2D {
         match self.node_type {
-            NodeType::ScrollFrame(ref scrolling) => scrolling.offset,
+            NodeType::Spatial { kind: SpatialNodeKind::ScrollFrame(ref scrolling), .. } => scrolling.offset,
             _ => LayoutVector2D::zero(),
         }
     }
 
     pub fn matches_external_id(&self, external_id: ExternalScrollId) -> bool {
         match self.node_type {
-            NodeType::ScrollFrame(info) if info.external_id == Some(external_id) => true,
+            NodeType::Spatial { kind: SpatialNodeKind::ScrollFrame(info), .. } if info.external_id == Some(external_id) => true,
             _ => false,
         }
     }
 }
 
 #[derive(Copy, Clone, Debug)]
 pub struct ScrollFrameInfo {
     /// The rectangle of the viewport of this scroll frame. This is important for
--- a/gfx/webrender/src/clip_scroll_tree.rs
+++ b/gfx/webrender/src/clip_scroll_tree.rs
@@ -1,19 +1,19 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{DeviceIntRect, DevicePixelScale, ExternalScrollId, LayoutPoint, LayoutRect, LayoutVector2D};
 use api::{PipelineId, ScrollClamping, ScrollLocation, ScrollNodeState};
-use api::WorldPoint;
+use api::{LayoutSize, LayoutTransform, PropertyBinding, ScrollSensitivity, WorldPoint};
 use clip::{ClipChain, ClipSourcesHandle, ClipStore};
-use clip_scroll_node::{ClipScrollNode, NodeType, ScrollFrameInfo, StickyFrameInfo};
+use clip_scroll_node::{ClipScrollNode, NodeType, SpatialNodeKind, ScrollFrameInfo, StickyFrameInfo};
 use gpu_cache::GpuCache;
-use gpu_types::{ClipScrollNodeIndex as GPUClipScrollNodeIndex, ClipScrollNodeData};
+use gpu_types::TransformPalette;
 use internal_types::{FastHashMap, FastHashSet};
 use print_tree::{PrintTree, PrintTreePrinter};
 use resource_cache::ResourceCache;
 use scene::SceneProperties;
 use util::{LayoutFastTransform, LayoutToWorldFastTransform};
 
 pub type ScrollStates = FastHashMap<ExternalScrollId, ScrollFrameInfo>;
 
@@ -24,16 +24,25 @@ pub type ScrollStates = FastHashMap<Exte
 #[derive(Debug, Copy, Clone, PartialEq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct CoordinateSystemId(pub u32);
 
 #[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
 pub struct ClipScrollNodeIndex(pub usize);
 
+// Used to index the smaller subset of nodes in the CST that define
+// new transform / positioning.
+// TODO(gw): In the future if we split the CST into a positioning and
+//           clipping tree, this can be tidied up a bit.
+#[derive(Copy, Debug, Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct TransformIndex(pub u32);
+
 const ROOT_REFERENCE_FRAME_INDEX: ClipScrollNodeIndex = ClipScrollNodeIndex(0);
 const TOPMOST_SCROLL_NODE_INDEX: ClipScrollNodeIndex = ClipScrollNodeIndex(1);
 
 impl CoordinateSystemId {
     pub fn root() -> Self {
         CoordinateSystemId(0)
     }
 
@@ -65,24 +74,23 @@ pub struct ClipScrollTree {
     pub clip_chains_descriptors: Vec<ClipChainDescriptor>,
 
     /// A vector of all ClipChains in this ClipScrollTree including those from
     /// ClipChainDescriptors and also those defined by the clipping node hierarchy.
     pub clip_chains: Vec<ClipChain>,
 
     pub pending_scroll_offsets: FastHashMap<ExternalScrollId, (LayoutPoint, ScrollClamping)>,
 
-    /// The current frame id, used for giving a unique id to all new dynamically
-    /// added frames and clips. The ClipScrollTree increments this by one every
-    /// time a new dynamic frame is created.
-    current_new_node_item: u64,
-
     /// A set of pipelines which should be discarded the next time this
     /// tree is drained.
     pub pipelines_to_discard: FastHashSet<PipelineId>,
+
+    /// The number of nodes in the CST that are spatial. Currently, this is all
+    /// nodes that are not clip nodes.
+    spatial_node_count: usize,
 }
 
 #[derive(Clone)]
 pub struct TransformUpdateState {
     pub parent_reference_frame_transform: LayoutToWorldFastTransform,
     pub parent_accumulated_scroll_offset: LayoutVector2D,
     pub nearest_scrolling_ancestor_offset: LayoutVector2D,
     pub nearest_scrolling_ancestor_viewport: LayoutRect,
@@ -107,18 +115,18 @@ pub struct TransformUpdateState {
 
 impl ClipScrollTree {
     pub fn new() -> Self {
         ClipScrollTree {
             nodes: Vec::new(),
             clip_chains_descriptors: Vec::new(),
             clip_chains: vec![ClipChain::empty(&DeviceIntRect::zero())],
             pending_scroll_offsets: FastHashMap::default(),
-            current_new_node_item: 1,
             pipelines_to_discard: FastHashSet::default(),
+            spatial_node_count: 0,
         }
     }
 
     /// The root reference frame, which is the true root of the ClipScrollTree. Initially
     /// this ID is not valid, which is indicated by ```nodes``` being empty.
     pub fn root_reference_frame_index(&self) -> ClipScrollNodeIndex {
         // TODO(mrobinson): We should eventually make this impossible to misuse.
         debug_assert!(!self.nodes.is_empty());
@@ -131,42 +139,41 @@ impl ClipScrollTree {
         // TODO(mrobinson): We should eventually make this impossible to misuse.
         debug_assert!(self.nodes.len() >= 1);
         TOPMOST_SCROLL_NODE_INDEX
     }
 
     pub fn get_scroll_node_state(&self) -> Vec<ScrollNodeState> {
         let mut result = vec![];
         for node in &self.nodes {
-            if let NodeType::ScrollFrame(info) = node.node_type {
+            if let NodeType::Spatial { kind: SpatialNodeKind::ScrollFrame(info), .. } = node.node_type {
                 if let Some(id) = info.external_id {
                     result.push(ScrollNodeState { id, scroll_offset: info.offset })
                 }
             }
         }
         result
     }
 
     pub fn drain(&mut self) -> ScrollStates {
-        self.current_new_node_item = 1;
-
         let mut scroll_states = FastHashMap::default();
         for old_node in &mut self.nodes.drain(..) {
             if self.pipelines_to_discard.contains(&old_node.pipeline_id) {
                 continue;
             }
 
             match old_node.node_type {
-                NodeType::ScrollFrame(info) if info.external_id.is_some() => {
+                NodeType::Spatial { kind: SpatialNodeKind::ScrollFrame(info), .. } if info.external_id.is_some() => {
                     scroll_states.insert(info.external_id.unwrap(), info);
                 }
                 _ => {}
             }
         }
 
+        self.spatial_node_count = 0;
         self.pipelines_to_discard.clear();
         self.clip_chains = vec![ClipChain::empty(&DeviceIntRect::zero())];
         self.clip_chains_descriptors.clear();
         scroll_states
     }
 
     pub fn scroll_node(
         &mut self,
@@ -190,17 +197,17 @@ impl ClipScrollTree {
     ) -> ClipScrollNodeIndex {
         let index = match index {
             Some(index) => index,
             None => return self.topmost_scroll_node_index(),
         };
 
         let node = &self.nodes[index.0];
         match node.node_type {
-            NodeType::ScrollFrame(state) if state.sensitive_to_input_events() => index,
+            NodeType::Spatial { kind: SpatialNodeKind::ScrollFrame(state), .. } if state.sensitive_to_input_events() => index,
             _ => self.find_nearest_scrolling_ancestor(node.parent)
         }
     }
 
     pub fn scroll_nearest_scrolling_ancestor(
         &mut self,
         scroll_location: ScrollLocation,
         node_index: Option<ClipScrollNodeIndex>,
@@ -215,88 +222,86 @@ impl ClipScrollTree {
     pub fn update_tree(
         &mut self,
         screen_rect: &DeviceIntRect,
         device_pixel_scale: DevicePixelScale,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         pan: WorldPoint,
-        node_data: &mut Vec<ClipScrollNodeData>,
         scene_properties: &SceneProperties,
-    ) {
-        if self.nodes.is_empty() {
-            return;
+    ) -> TransformPalette {
+        let mut transform_palette = TransformPalette::new(self.spatial_node_count);
+
+        if !self.nodes.is_empty() {
+            self.clip_chains[0] = ClipChain::empty(screen_rect);
+
+            let root_reference_frame_index = self.root_reference_frame_index();
+            let mut state = TransformUpdateState {
+                parent_reference_frame_transform: LayoutVector2D::new(pan.x, pan.y).into(),
+                parent_accumulated_scroll_offset: LayoutVector2D::zero(),
+                nearest_scrolling_ancestor_offset: LayoutVector2D::zero(),
+                nearest_scrolling_ancestor_viewport: LayoutRect::zero(),
+                parent_clip_chain_index: ClipChainIndex(0),
+                current_coordinate_system_id: CoordinateSystemId::root(),
+                coordinate_system_relative_transform: LayoutFastTransform::identity(),
+                invertible: true,
+            };
+            let mut next_coordinate_system_id = state.current_coordinate_system_id.next();
+            self.update_node(
+                root_reference_frame_index,
+                &mut state,
+                &mut next_coordinate_system_id,
+                device_pixel_scale,
+                clip_store,
+                resource_cache,
+                gpu_cache,
+                &mut transform_palette,
+                scene_properties,
+            );
+
+            self.build_clip_chains(screen_rect);
         }
 
-        self.clip_chains[0] = ClipChain::empty(screen_rect);
-
-        let root_reference_frame_index = self.root_reference_frame_index();
-        let mut state = TransformUpdateState {
-            parent_reference_frame_transform: LayoutVector2D::new(pan.x, pan.y).into(),
-            parent_accumulated_scroll_offset: LayoutVector2D::zero(),
-            nearest_scrolling_ancestor_offset: LayoutVector2D::zero(),
-            nearest_scrolling_ancestor_viewport: LayoutRect::zero(),
-            parent_clip_chain_index: ClipChainIndex(0),
-            current_coordinate_system_id: CoordinateSystemId::root(),
-            coordinate_system_relative_transform: LayoutFastTransform::identity(),
-            invertible: true,
-        };
-        let mut next_coordinate_system_id = state.current_coordinate_system_id.next();
-        self.update_node(
-            root_reference_frame_index,
-            &mut state,
-            &mut next_coordinate_system_id,
-            device_pixel_scale,
-            clip_store,
-            resource_cache,
-            gpu_cache,
-            node_data,
-            scene_properties,
-        );
-
-        self.build_clip_chains(screen_rect);
+        transform_palette
     }
 
     fn update_node(
         &mut self,
         node_index: ClipScrollNodeIndex,
         state: &mut TransformUpdateState,
         next_coordinate_system_id: &mut CoordinateSystemId,
         device_pixel_scale: DevicePixelScale,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
-        gpu_node_data: &mut Vec<ClipScrollNodeData>,
+        transform_palette: &mut TransformPalette,
         scene_properties: &SceneProperties,
     ) {
         // TODO(gw): This is an ugly borrow check workaround to clone these.
         //           Restructure this to avoid the clones!
         let mut state = state.clone();
         let node_children = {
             let node = match self.nodes.get_mut(node_index.0) {
                 Some(node) => node,
                 None => return,
             };
 
-            // We set this early so that we can use it to populate the ClipChain.
-            node.node_data_index = GPUClipScrollNodeIndex(gpu_node_data.len() as u32);
-
             node.update(
                 &mut state,
                 next_coordinate_system_id,
                 device_pixel_scale,
                 clip_store,
                 resource_cache,
                 gpu_cache,
                 scene_properties,
                 &mut self.clip_chains,
             );
 
-            node.push_gpu_node_data(gpu_node_data);
+            node.push_gpu_data(transform_palette);
 
             if node.children.is_empty() {
                 return;
             }
 
             node.prepare_state_for_children(&mut state);
             node.children.clone()
         };
@@ -305,17 +310,17 @@ impl ClipScrollTree {
             self.update_node(
                 child_node_index,
                 &mut state,
                 next_coordinate_system_id,
                 device_pixel_scale,
                 clip_store,
                 resource_cache,
                 gpu_cache,
-                gpu_node_data,
+                transform_palette,
                 scene_properties,
             );
         }
     }
 
     pub fn build_clip_chains(&mut self, screen_rect: &DeviceIntRect) {
         for descriptor in &self.clip_chains_descriptors {
             // A ClipChain is an optional parent (which is another ClipChain) and a list of
@@ -341,52 +346,117 @@ impl ClipScrollTree {
             chain.parent_index = descriptor.parent;
             self.clip_chains[descriptor.index.0] = chain;
         }
     }
 
     pub fn finalize_and_apply_pending_scroll_offsets(&mut self, old_states: ScrollStates) {
         for node in &mut self.nodes {
             let external_id = match node.node_type {
-                NodeType::ScrollFrame(ScrollFrameInfo { external_id: Some(id), ..} ) => id,
+                NodeType::Spatial { kind: SpatialNodeKind::ScrollFrame(ScrollFrameInfo { external_id: Some(id), ..} ), .. } => id,
                 _ => continue,
             };
 
             if let Some(scrolling_state) = old_states.get(&external_id) {
                 node.apply_old_scrolling_state(scrolling_state);
             }
 
             if let Some((offset, clamping)) = self.pending_scroll_offsets.remove(&external_id) {
                 node.set_scroll_origin(&offset, clamping);
             }
         }
     }
 
+    // Generate the next valid TransformIndex for the CST.
+    fn next_transform_index(&mut self) -> TransformIndex {
+        let transform_index = TransformIndex(self.spatial_node_count as u32);
+        self.spatial_node_count += 1;
+        transform_index
+    }
+
     pub fn add_clip_node(
         &mut self,
         index: ClipScrollNodeIndex,
         parent_index: ClipScrollNodeIndex,
         handle: ClipSourcesHandle,
         pipeline_id: PipelineId,
     )  -> ClipChainIndex {
         let clip_chain_index = self.allocate_clip_chain();
-        let node_type = NodeType::Clip { handle, clip_chain_index, clip_chain_node: None };
-        let node = ClipScrollNode::new(pipeline_id, Some(parent_index), node_type);
+        let transform_index = self.nodes[parent_index.0].transform_index;
+
+        let node_type = NodeType::Clip {
+            handle,
+            clip_chain_index,
+            clip_chain_node: None,
+        };
+        let node = ClipScrollNode::new(
+            pipeline_id,
+            Some(parent_index),
+            node_type,
+            transform_index,
+        );
         self.add_node(node, index);
         clip_chain_index
     }
 
+    pub fn add_scroll_frame(
+        &mut self,
+        index: ClipScrollNodeIndex,
+        parent_index: ClipScrollNodeIndex,
+        external_id: Option<ExternalScrollId>,
+        pipeline_id: PipelineId,
+        frame_rect: &LayoutRect,
+        content_size: &LayoutSize,
+        scroll_sensitivity: ScrollSensitivity,
+    ) {
+        let node = ClipScrollNode::new_scroll_frame(
+            pipeline_id,
+            parent_index,
+            external_id,
+            frame_rect,
+            content_size,
+            scroll_sensitivity,
+            self.next_transform_index(),
+        );
+        self.add_node(node, index);
+    }
+
+    pub fn add_reference_frame(
+        &mut self,
+        index: ClipScrollNodeIndex,
+        parent_index: Option<ClipScrollNodeIndex>,
+        source_transform: Option<PropertyBinding<LayoutTransform>>,
+        source_perspective: Option<LayoutTransform>,
+        origin_in_parent_reference_frame: LayoutVector2D,
+        pipeline_id: PipelineId,
+    ) {
+        let node = ClipScrollNode::new_reference_frame(
+            parent_index,
+            source_transform,
+            source_perspective,
+            origin_in_parent_reference_frame,
+            pipeline_id,
+            self.next_transform_index(),
+        );
+        self.add_node(node, index);
+    }
+
     pub fn add_sticky_frame(
         &mut self,
         index: ClipScrollNodeIndex,
         parent_index: ClipScrollNodeIndex,
         sticky_frame_info: StickyFrameInfo,
         pipeline_id: PipelineId,
     ) {
-        let node = ClipScrollNode::new_sticky_frame(parent_index, sticky_frame_info, pipeline_id);
+        let node = ClipScrollNode::new_sticky_frame(
+            parent_index,
+            sticky_frame_info,
+            pipeline_id,
+            self.next_transform_index(),
+        );
         self.add_node(node, index);
     }
 
     pub fn add_clip_chain_descriptor(
         &mut self,
         parent: Option<ClipChainIndex>,
         clips: Vec<ClipScrollNodeIndex>
     ) -> ClipChainIndex {
@@ -431,43 +501,47 @@ impl ClipScrollTree {
     fn print_node<T: PrintTreePrinter>(
         &self,
         index: ClipScrollNodeIndex,
         pt: &mut T,
         clip_store: &ClipStore
     ) {
         let node = &self.nodes[index.0];
         match node.node_type {
+            NodeType::Spatial { ref kind, .. } => {
+                match *kind {
+                    SpatialNodeKind::StickyFrame(ref sticky_frame_info) => {
+                        pt.new_level(format!("StickyFrame"));
+                        pt.add_item(format!("index: {:?}", index));
+                        pt.add_item(format!("sticky info: {:?}", sticky_frame_info));
+                    }
+                    SpatialNodeKind::ScrollFrame(scrolling_info) => {
+                        pt.new_level(format!("ScrollFrame"));
+                        pt.add_item(format!("index: {:?}", index));
+                        pt.add_item(format!("viewport: {:?}", scrolling_info.viewport_rect));
+                        pt.add_item(format!("scrollable_size: {:?}", scrolling_info.scrollable_size));
+                        pt.add_item(format!("scroll offset: {:?}", scrolling_info.offset));
+                    }
+                    SpatialNodeKind::ReferenceFrame(ref info) => {
+                        pt.new_level(format!("ReferenceFrame {:?}", info.resolved_transform));
+                        pt.add_item(format!("index: {:?}", index));
+                    }
+                }
+            }
             NodeType::Clip { ref handle, .. } => {
                 pt.new_level("Clip".to_owned());
 
                 pt.add_item(format!("index: {:?}", index));
                 let clips = clip_store.get(handle).clips();
                 pt.new_level(format!("Clip Sources [{}]", clips.len()));
                 for source in clips {
                     pt.add_item(format!("{:?}", source));
                 }
                 pt.end_level();
             }
-            NodeType::ReferenceFrame(ref info) => {
-                pt.new_level(format!("ReferenceFrame {:?}", info.resolved_transform));
-                pt.add_item(format!("index: {:?}", index));
-            }
-            NodeType::ScrollFrame(scrolling_info) => {
-                pt.new_level(format!("ScrollFrame"));
-                pt.add_item(format!("index: {:?}", index));
-                pt.add_item(format!("viewport: {:?}", scrolling_info.viewport_rect));
-                pt.add_item(format!("scrollable_size: {:?}", scrolling_info.scrollable_size));
-                pt.add_item(format!("scroll offset: {:?}", scrolling_info.offset));
-            }
-            NodeType::StickyFrame(ref sticky_frame_info) => {
-                pt.new_level(format!("StickyFrame"));
-                pt.add_item(format!("index: {:?}", index));
-                pt.add_item(format!("sticky info: {:?}", sticky_frame_info));
-            }
             NodeType::Empty => unreachable!("Empty node remaining in ClipScrollTree."),
         }
 
         pt.add_item(format!("world_viewport_transform: {:?}", node.world_viewport_transform));
         pt.add_item(format!("world_content_transform: {:?}", node.world_content_transform));
         pt.add_item(format!("coordinate_system_id: {:?}", node.coordinate_system_id));
 
         for child_index in &node.children {
deleted file mode 100644
--- a/gfx/webrender/src/device.rs
+++ /dev/null
@@ -1,2428 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-use super::shader_source;
-use api::{ColorF, ImageFormat};
-use api::{DeviceIntPoint, DeviceIntRect, DeviceUintRect, DeviceUintSize};
-use api::TextureTarget;
-#[cfg(any(feature = "debug_renderer", feature="capture"))]
-use api::ImageDescriptor;
-use euclid::Transform3D;
-use gleam::gl;
-use internal_types::{FastHashMap, RenderTargetInfo};
-use log::Level;
-use smallvec::SmallVec;
-use std::cell::RefCell;
-use std::fs::File;
-use std::io::Read;
-use std::marker::PhantomData;
-use std::mem;
-use std::ops::Add;
-use std::path::PathBuf;
-use std::ptr;
-use std::rc::Rc;
-use std::slice;
-use std::sync::Arc;
-use std::thread;
-
-#[derive(Debug, Copy, Clone, PartialEq, Ord, Eq, PartialOrd)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct FrameId(usize);
-
-impl FrameId {
-    pub fn new(value: usize) -> Self {
-        FrameId(value)
-    }
-}
-
-impl Add<usize> for FrameId {
-    type Output = FrameId;
-
-    fn add(self, other: usize) -> FrameId {
-        FrameId(self.0 + other)
-    }
-}
-
-const GL_FORMAT_RGBA: gl::GLuint = gl::RGBA;
-
-const GL_FORMAT_BGRA_GL: gl::GLuint = gl::BGRA;
-
-const GL_FORMAT_BGRA_GLES: gl::GLuint = gl::BGRA_EXT;
-
-const SHADER_VERSION_GL: &str = "#version 150\n";
-const SHADER_VERSION_GLES: &str = "#version 300 es\n";
-
-const SHADER_KIND_VERTEX: &str = "#define WR_VERTEX_SHADER\n";
-const SHADER_KIND_FRAGMENT: &str = "#define WR_FRAGMENT_SHADER\n";
-const SHADER_IMPORT: &str = "#include ";
-
-pub struct TextureSlot(pub usize);
-
-// In some places we need to temporarily bind a texture to any slot.
-const DEFAULT_TEXTURE: TextureSlot = TextureSlot(0);
-
-#[repr(u32)]
-pub enum DepthFunction {
-    #[cfg(feature = "debug_renderer")]
-    Less = gl::LESS,
-    LessEqual = gl::LEQUAL,
-}
-
-#[derive(Copy, Clone, Debug, PartialEq)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum TextureFilter {
-    Nearest,
-    Linear,
-    Trilinear,
-}
-
-#[derive(Debug)]
-pub enum VertexAttributeKind {
-    F32,
-    #[cfg(feature = "debug_renderer")]
-    U8Norm,
-    U16Norm,
-    I32,
-    U16,
-}
-
-#[derive(Debug)]
-pub struct VertexAttribute {
-    pub name: &'static str,
-    pub count: u32,
-    pub kind: VertexAttributeKind,
-}
-
-#[derive(Debug)]
-pub struct VertexDescriptor {
-    pub vertex_attributes: &'static [VertexAttribute],
-    pub instance_attributes: &'static [VertexAttribute],
-}
-
-enum FBOTarget {
-    Read,
-    Draw,
-}
-
-/// Method of uploading texel data from CPU to GPU.
-#[derive(Debug, Clone)]
-pub enum UploadMethod {
-    /// Just call `glTexSubImage` directly with the CPU data pointer
-    Immediate,
-    /// Accumulate the changes in PBO first before transferring to a texture.
-    PixelBuffer(VertexUsageHint),
-}
-
-/// Plain old data that can be used to initialize a texture.
-pub unsafe trait Texel: Copy {}
-unsafe impl Texel for u8 {}
-unsafe impl Texel for f32 {}
-
-#[derive(Clone, Copy, Debug, PartialEq)]
-pub enum ReadPixelsFormat {
-    Standard(ImageFormat),
-    Rgba8,
-}
-
-pub fn get_gl_target(target: TextureTarget) -> gl::GLuint {
-    match target {
-        TextureTarget::Default => gl::TEXTURE_2D,
-        TextureTarget::Array => gl::TEXTURE_2D_ARRAY,
-        TextureTarget::Rect => gl::TEXTURE_RECTANGLE,
-        TextureTarget::External => gl::TEXTURE_EXTERNAL_OES,
-    }
-}
-
-fn supports_extension(extensions: &[String], extension: &str) -> bool {
-    extensions.iter().any(|s| s == extension)
-}
-
-fn get_shader_version(gl: &gl::Gl) -> &'static str {
-    match gl.get_type() {
-        gl::GlType::Gl => SHADER_VERSION_GL,
-        gl::GlType::Gles => SHADER_VERSION_GLES,
-    }
-}
-
-// Get a shader string by name, from the built in resources or
-// an override path, if supplied.
-fn get_shader_source(shader_name: &str, base_path: &Option<PathBuf>) -> Option<String> {
-    if let Some(ref base) = *base_path {
-        let shader_path = base.join(&format!("{}.glsl", shader_name));
-        if shader_path.exists() {
-            let mut source = String::new();
-            File::open(&shader_path)
-                .unwrap()
-                .read_to_string(&mut source)
-                .unwrap();
-            return Some(source);
-        }
-    }
-
-    shader_source::SHADERS
-        .get(shader_name)
-        .map(|s| s.to_string())
-}
-
-// Parse a shader string for imports. Imports are recursively processed, and
-// prepended to the list of outputs.
-fn parse_shader_source(source: String, base_path: &Option<PathBuf>, output: &mut String) {
-    for line in source.lines() {
-        if line.starts_with(SHADER_IMPORT) {
-            let imports = line[SHADER_IMPORT.len() ..].split(',');
-
-            // For each import, get the source, and recurse.
-            for import in imports {
-                if let Some(include) = get_shader_source(import, base_path) {
-                    parse_shader_source(include, base_path, output);
-                }
-            }
-        } else {
-            output.push_str(line);
-            output.push_str("\n");
-        }
-    }
-}
-
-pub fn build_shader_strings(
-    gl_version_string: &str,
-    features: &str,
-    base_filename: &str,
-    override_path: &Option<PathBuf>,
-) -> (String, String) {
-    // Construct a list of strings to be passed to the shader compiler.
-    let mut vs_source = String::new();
-    let mut fs_source = String::new();
-
-    // GLSL requires that the version number comes first.
-    vs_source.push_str(gl_version_string);
-    fs_source.push_str(gl_version_string);
-
-    // Insert the shader name to make debugging easier.
-    let name_string = format!("// {}\n", base_filename);
-    vs_source.push_str(&name_string);
-    fs_source.push_str(&name_string);
-
-    // Define a constant depending on whether we are compiling VS or FS.
-    vs_source.push_str(SHADER_KIND_VERTEX);
-    fs_source.push_str(SHADER_KIND_FRAGMENT);
-
-    // Add any defines that were passed by the caller.
-    vs_source.push_str(features);
-    fs_source.push_str(features);
-
-    // Parse the main .glsl file, including any imports
-    // and append them to the list of sources.
-    let mut shared_result = String::new();
-    if let Some(shared_source) = get_shader_source(base_filename, override_path) {
-        parse_shader_source(shared_source, override_path, &mut shared_result);
-    }
-
-    vs_source.push_str(&shared_result);
-    fs_source.push_str(&shared_result);
-
-    (vs_source, fs_source)
-}
-
-pub trait FileWatcherHandler: Send {
-    fn file_changed(&self, path: PathBuf);
-}
-
-impl VertexAttributeKind {
-    fn size_in_bytes(&self) -> u32 {
-        match *self {
-            VertexAttributeKind::F32 => 4,
-            #[cfg(feature = "debug_renderer")]
-            VertexAttributeKind::U8Norm => 1,
-            VertexAttributeKind::U16Norm => 2,
-            VertexAttributeKind::I32 => 4,
-            VertexAttributeKind::U16 => 2,
-        }
-    }
-}
-
-impl VertexAttribute {
-    fn size_in_bytes(&self) -> u32 {
-        self.count * self.kind.size_in_bytes()
-    }
-
-    fn bind_to_vao(
-        &self,
-        attr_index: gl::GLuint,
-        divisor: gl::GLuint,
-        stride: gl::GLint,
-        offset: gl::GLuint,
-        gl: &gl::Gl,
-    ) {
-        gl.enable_vertex_attrib_array(attr_index);
-        gl.vertex_attrib_divisor(attr_index, divisor);
-
-        match self.kind {
-            VertexAttributeKind::F32 => {
-                gl.vertex_attrib_pointer(
-                    attr_index,
-                    self.count as gl::GLint,
-                    gl::FLOAT,
-                    false,
-                    stride,
-                    offset,
-                );
-            }
-            #[cfg(feature = "debug_renderer")]
-            VertexAttributeKind::U8Norm => {
-                gl.vertex_attrib_pointer(
-                    attr_index,
-                    self.count as gl::GLint,
-                    gl::UNSIGNED_BYTE,
-                    true,
-                    stride,
-                    offset,
-                );
-            }
-            VertexAttributeKind::U16Norm => {
-                gl.vertex_attrib_pointer(
-                    attr_index,
-                    self.count as gl::GLint,
-                    gl::UNSIGNED_SHORT,
-                    true,
-                    stride,
-                    offset,
-                );
-            }
-            VertexAttributeKind::I32 => {
-                gl.vertex_attrib_i_pointer(
-                    attr_index,
-                    self.count as gl::GLint,
-                    gl::INT,
-                    stride,
-                    offset,
-                );
-            }
-            VertexAttributeKind::U16 => {
-                gl.vertex_attrib_i_pointer(
-                    attr_index,
-                    self.count as gl::GLint,
-                    gl::UNSIGNED_SHORT,
-                    stride,
-                    offset,
-                );
-            }
-        }
-    }
-}
-
-impl VertexDescriptor {
-    fn instance_stride(&self) -> u32 {
-        self.instance_attributes
-            .iter()
-            .map(|attr| attr.size_in_bytes())
-            .sum()
-    }
-
-    fn bind_attributes(
-        attributes: &[VertexAttribute],
-        start_index: usize,
-        divisor: u32,
-        gl: &gl::Gl,
-        vbo: VBOId,
-    ) {
-        vbo.bind(gl);
-
-        let stride: u32 = attributes
-            .iter()
-            .map(|attr| attr.size_in_bytes())
-            .sum();
-
-        let mut offset = 0;
-        for (i, attr) in attributes.iter().enumerate() {
-            let attr_index = (start_index + i) as gl::GLuint;
-            attr.bind_to_vao(attr_index, divisor, stride as _, offset, gl);
-            offset += attr.size_in_bytes();
-        }
-    }
-
-    fn bind(&self, gl: &gl::Gl, main: VBOId, instance: VBOId) {
-        Self::bind_attributes(self.vertex_attributes, 0, 0, gl, main);
-
-        if !self.instance_attributes.is_empty() {
-            Self::bind_attributes(
-                self.instance_attributes,
-                self.vertex_attributes.len(),
-                1, gl, instance,
-            );
-        }
-    }
-}
-
-impl VBOId {
-    fn bind(&self, gl: &gl::Gl) {
-        gl.bind_buffer(gl::ARRAY_BUFFER, self.0);
-    }
-}
-
-impl IBOId {
-    fn bind(&self, gl: &gl::Gl) {
-        gl.bind_buffer(gl::ELEMENT_ARRAY_BUFFER, self.0);
-    }
-}
-
-impl FBOId {
-    fn bind(&self, gl: &gl::Gl, target: FBOTarget) {
-        let target = match target {
-            FBOTarget::Read => gl::READ_FRAMEBUFFER,
-            FBOTarget::Draw => gl::DRAW_FRAMEBUFFER,
-        };
-        gl.bind_framebuffer(target, self.0);
-    }
-}
-
-pub struct Stream<'a> {
-    attributes: &'a [VertexAttribute],
-    vbo: VBOId,
-}
-
-pub struct VBO<V> {
-    id: gl::GLuint,
-    target: gl::GLenum,
-    allocated_count: usize,
-    marker: PhantomData<V>,
-}
-
-impl<V> VBO<V> {
-    pub fn allocated_count(&self) -> usize {
-        self.allocated_count
-    }
-
-    pub fn stream_with<'a>(&self, attributes: &'a [VertexAttribute]) -> Stream<'a> {
-        debug_assert_eq!(
-            mem::size_of::<V>(),
-            attributes.iter().map(|a| a.size_in_bytes() as usize).sum::<usize>()
-        );
-        Stream {
-            attributes,
-            vbo: VBOId(self.id),
-        }
-    }
-}
-
-impl<T> Drop for VBO<T> {
-    fn drop(&mut self) {
-        debug_assert!(thread::panicking() || self.id == 0);
-    }
-}
-
-#[cfg_attr(feature = "replay", derive(Clone))]
-pub struct ExternalTexture {
-    id: gl::GLuint,
-    target: gl::GLuint,
-}
-
-impl ExternalTexture {
-    pub fn new(id: u32, target: TextureTarget) -> Self {
-        ExternalTexture {
-            id,
-            target: get_gl_target(target),
-        }
-    }
-
-    #[cfg(feature = "replay")]
-    pub fn internal_id(&self) -> gl::GLuint {
-        self.id
-    }
-}
-
-pub struct Texture {
-    id: gl::GLuint,
-    target: gl::GLuint,
-    layer_count: i32,
-    format: ImageFormat,
-    width: u32,
-    height: u32,
-    filter: TextureFilter,
-    render_target: Option<RenderTargetInfo>,
-    fbo_ids: Vec<FBOId>,
-    depth_rb: Option<RBOId>,
-    last_frame_used: FrameId,
-}
-
-impl Texture {
-    pub fn get_dimensions(&self) -> DeviceUintSize {
-        DeviceUintSize::new(self.width, self.height)
-    }
-
-    pub fn get_render_target_layer_count(&self) -> usize {
-        self.fbo_ids.len()
-    }
-
-    pub fn get_layer_count(&self) -> i32 {
-        self.layer_count
-    }
-
-    pub fn get_format(&self) -> ImageFormat {
-        self.format
-    }
-
-    #[cfg(any(feature = "debug_renderer", feature = "capture"))]
-    pub fn get_filter(&self) -> TextureFilter {
-        self.filter
-    }
-
-    #[cfg(any(feature = "debug_renderer", feature = "capture"))]
-    pub fn get_render_target(&self) -> Option<RenderTargetInfo> {
-        self.render_target.clone()
-    }
-
-    pub fn has_depth(&self) -> bool {
-        self.depth_rb.is_some()
-    }
-
-    pub fn get_rt_info(&self) -> Option<&RenderTargetInfo> {
-        self.render_target.as_ref()
-    }
-
-    pub fn used_in_frame(&self, frame_id: FrameId) -> bool {
-        self.last_frame_used == frame_id
-    }
-
-    #[cfg(feature = "replay")]
-    pub fn into_external(mut self) -> ExternalTexture {
-        let ext = ExternalTexture {
-            id: self.id,
-            target: self.target,
-        };
-        self.id = 0; // don't complain, moved out
-        ext
-    }
-}
-
-impl Drop for Texture {
-    fn drop(&mut self) {
-        debug_assert!(thread::panicking() || self.id == 0);
-    }
-}
-
-pub struct Program {
-    id: gl::GLuint,
-    u_transform: gl::GLint,
-    u_device_pixel_ratio: gl::GLint,
-    u_mode: gl::GLint,
-}
-
-impl Drop for Program {
-    fn drop(&mut self) {
-        debug_assert!(
-            thread::panicking() || self.id == 0,
-            "renderer::deinit not called"
-        );
-    }
-}
-
-pub struct CustomVAO {
-    id: gl::GLuint,
-}
-
-impl Drop for CustomVAO {
-    fn drop(&mut self) {
-        debug_assert!(
-            thread::panicking() || self.id == 0,
-            "renderer::deinit not called"
-        );
-    }
-}
-
-pub struct VAO {
-    id: gl::GLuint,
-    ibo_id: IBOId,
-    main_vbo_id: VBOId,
-    instance_vbo_id: VBOId,
-    instance_stride: usize,
-    owns_vertices_and_indices: bool,
-}
-
-impl Drop for VAO {
-    fn drop(&mut self) {
-        debug_assert!(
-            thread::panicking() || self.id == 0,
-            "renderer::deinit not called"
-        );
-    }
-}
-
-pub struct PBO {
-    id: gl::GLuint,
-}
-
-impl Drop for PBO {
-    fn drop(&mut self) {
-        debug_assert!(
-            thread::panicking() || self.id == 0,
-            "renderer::deinit not called"
-        );
-    }
-}
-
-#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
-pub struct FBOId(gl::GLuint);
-
-#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
-pub struct RBOId(gl::GLuint);
-
-#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
-pub struct VBOId(gl::GLuint);
-
-#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
-struct IBOId(gl::GLuint);
-
-#[derive(Clone, PartialEq, Eq, Hash, Debug)]
-#[cfg_attr(feature = "serialize_program", derive(Deserialize, Serialize))]
-pub struct ProgramSources {
-    renderer_name: String,
-    vs_source: String,
-    fs_source: String,
-}
-
-impl ProgramSources {
-    fn new(renderer_name: String, vs_source: String, fs_source: String) -> Self {
-        ProgramSources {
-            renderer_name,
-            vs_source,
-            fs_source,
-        }
-    }
-}
-
-#[cfg_attr(feature = "serialize_program", derive(Deserialize, Serialize))]
-pub struct ProgramBinary {
-    binary: Vec<u8>,
-    format: gl::GLenum,
-    #[cfg(feature = "serialize_program")]
-    sources: ProgramSources,
-}
-
-impl ProgramBinary {
-    #[allow(unused_variables)]
-    fn new(binary: Vec<u8>,
-           format: gl::GLenum,
-           sources: &ProgramSources) -> Self {
-        ProgramBinary {
-            binary,
-            format,
-            #[cfg(feature = "serialize_program")]
-            sources: sources.clone(),
-        }
-    }
-}
-
-/// The interfaces that an application can implement to handle ProgramCache update
-pub trait ProgramCacheObserver {
-    fn notify_binary_added(&self, program_binary: &Arc<ProgramBinary>);
-    fn notify_program_binary_failed(&self, program_binary: &Arc<ProgramBinary>);
-}
-
-pub struct ProgramCache {
-    binaries: RefCell<FastHashMap<ProgramSources, Arc<ProgramBinary>>>,
-
-    /// Optional trait object that allows the client
-    /// application to handle ProgramCache updating
-    program_cache_handler: Option<Box<ProgramCacheObserver>>,
-}
-
-impl ProgramCache {
-    pub fn new(program_cache_observer: Option<Box<ProgramCacheObserver>>) -> Rc<Self> {
-        Rc::new(
-            ProgramCache {
-                binaries: RefCell::new(FastHashMap::default()),
-                program_cache_handler: program_cache_observer,
-            }
-        )
-    }
-    /// Load ProgramBinary to ProgramCache.
-    /// The function is typically used to load ProgramBinary from disk.
-    #[cfg(feature = "serialize_program")]
-    pub fn load_program_binary(&self, program_binary: Arc<ProgramBinary>) {
-        let sources = program_binary.sources.clone();
-        self.binaries.borrow_mut().insert(sources, program_binary);
-    }
-}
-
-#[derive(Debug, Copy, Clone)]
-pub enum VertexUsageHint {
-    Static,
-    Dynamic,
-    Stream,
-}
-
-impl VertexUsageHint {
-    fn to_gl(&self) -> gl::GLuint {
-        match *self {
-            VertexUsageHint::Static => gl::STATIC_DRAW,
-            VertexUsageHint::Dynamic => gl::DYNAMIC_DRAW,
-            VertexUsageHint::Stream => gl::STREAM_DRAW,
-        }
-    }
-}
-
-#[derive(Copy, Clone, Debug)]
-pub struct UniformLocation(gl::GLint);
-
-impl UniformLocation {
-    pub const INVALID: Self = UniformLocation(-1);
-}
-
-#[cfg(feature = "debug_renderer")]
-pub struct Capabilities {
-    pub supports_multisampling: bool,
-}
-
-#[derive(Clone, Debug)]
-pub enum ShaderError {
-    Compilation(String, String), // name, error message
-    Link(String, String),        // name, error message
-}
-
-pub struct Device {
-    gl: Rc<gl::Gl>,
-    // device state
-    bound_textures: [gl::GLuint; 16],
-    bound_program: gl::GLuint,
-    bound_vao: gl::GLuint,
-    bound_read_fbo: FBOId,
-    bound_draw_fbo: FBOId,
-    program_mode_id: UniformLocation,
-    default_read_fbo: gl::GLuint,
-    default_draw_fbo: gl::GLuint,
-
-    device_pixel_ratio: f32,
-    upload_method: UploadMethod,
-
-    // HW or API capabilities
-    #[cfg(feature = "debug_renderer")]
-    capabilities: Capabilities,
-
-    bgra_format: gl::GLuint,
-
-    // debug
-    inside_frame: bool,
-
-    // resources
-    resource_override_path: Option<PathBuf>,
-
-    max_texture_size: u32,
-    renderer_name: String,
-    cached_programs: Option<Rc<ProgramCache>>,
-
-    // Frame counter. This is used to map between CPU
-    // frames and GPU frames.
-    frame_id: FrameId,
-
-    // GL extensions
-    extensions: Vec<String>,
-}
-
-impl Device {
-    pub fn new(
-        gl: Rc<gl::Gl>,
-        resource_override_path: Option<PathBuf>,
-        upload_method: UploadMethod,
-        _file_changed_handler: Box<FileWatcherHandler>,
-        cached_programs: Option<Rc<ProgramCache>>,
-    ) -> Device {
-        let mut max_texture_size = [0];
-        unsafe {
-            gl.get_integer_v(gl::MAX_TEXTURE_SIZE, &mut max_texture_size);
-        }
-        let max_texture_size = max_texture_size[0] as u32;
-        let renderer_name = gl.get_string(gl::RENDERER);
-
-        let mut extension_count = [0];
-        unsafe {
-            gl.get_integer_v(gl::NUM_EXTENSIONS, &mut extension_count);
-        }
-        let extension_count = extension_count[0] as gl::GLuint;
-        let mut extensions = Vec::new();
-        for i in 0 .. extension_count {
-            extensions.push(gl.get_string_i(gl::EXTENSIONS, i));
-        }
-
-        let supports_bgra = supports_extension(&extensions, "GL_EXT_texture_format_BGRA8888");
-        let bgra_format = match gl.get_type() {
-            gl::GlType::Gl => GL_FORMAT_BGRA_GL,
-            gl::GlType::Gles => if supports_bgra {
-                GL_FORMAT_BGRA_GLES
-            } else {
-                GL_FORMAT_RGBA
-            }
-        };
-
-        Device {
-            gl,
-            resource_override_path,
-            // This is initialized to 1 by default, but it is reset
-            // at the beginning of each frame in `Renderer::bind_frame_data`.
-            device_pixel_ratio: 1.0,
-            upload_method,
-            inside_frame: false,
-
-            #[cfg(feature = "debug_renderer")]
-            capabilities: Capabilities {
-                supports_multisampling: false, //TODO
-            },
-
-            bgra_format,
-
-            bound_textures: [0; 16],
-            bound_program: 0,
-            bound_vao: 0,
-            bound_read_fbo: FBOId(0),
-            bound_draw_fbo: FBOId(0),
-            program_mode_id: UniformLocation::INVALID,
-            default_read_fbo: 0,
-            default_draw_fbo: 0,
-
-            max_texture_size,
-            renderer_name,
-            cached_programs,
-            frame_id: FrameId(0),
-            extensions,
-        }
-    }
-
-    pub fn gl(&self) -> &gl::Gl {
-        &*self.gl
-    }
-
-    pub fn rc_gl(&self) -> &Rc<gl::Gl> {
-        &self.gl
-    }
-
-    pub fn set_device_pixel_ratio(&mut self, ratio: f32) {
-        self.device_pixel_ratio = ratio;
-    }
-
-    pub fn update_program_cache(&mut self, cached_programs: Rc<ProgramCache>) {
-        self.cached_programs = Some(cached_programs);
-    }
-
-    pub fn max_texture_size(&self) -> u32 {
-        self.max_texture_size
-    }
-
-    #[cfg(feature = "debug_renderer")]
-    pub fn get_capabilities(&self) -> &Capabilities {
-        &self.capabilities
-    }
-
-    pub fn reset_state(&mut self) {
-        self.bound_textures = [0; 16];
-        self.bound_vao = 0;
-        self.bound_read_fbo = FBOId(0);
-        self.bound_draw_fbo = FBOId(0);
-    }
-
-    #[cfg(debug_assertions)]
-    fn print_shader_errors(source: &str, log: &str) {
-        // hacky way to extract the offending lines
-        if !log.starts_with("0:") {
-            return;
-        }
-        let end_pos = match log[2..].chars().position(|c| !c.is_digit(10)) {
-            Some(pos) => 2 + pos,
-            None => return,
-        };
-        let base_line_number = match log[2 .. end_pos].parse::<usize>() {
-            Ok(number) if number >= 2 => number - 2,
-            _ => return,
-        };
-        for (line, prefix) in source.lines().skip(base_line_number).zip(&["|",">","|"]) {
-            error!("{}\t{}", prefix, line);
-        }
-    }
-
-    pub fn compile_shader(
-        gl: &gl::Gl,
-        name: &str,
-        shader_type: gl::GLenum,
-        source: &String,
-    ) -> Result<gl::GLuint, ShaderError> {
-        debug!("compile {}", name);
-        let id = gl.create_shader(shader_type);
-        gl.shader_source(id, &[source.as_bytes()]);
-        gl.compile_shader(id);
-        let log = gl.get_shader_info_log(id);
-        let mut status = [0];
-        unsafe {
-            gl.get_shader_iv(id, gl::COMPILE_STATUS, &mut status);
-        }
-        if status[0] == 0 {
-            error!("Failed to compile shader: {}\n{}", name, log);
-            #[cfg(debug_assertions)]
-            Self::print_shader_errors(source, &log);
-            Err(ShaderError::Compilation(name.to_string(), log))
-        } else {
-            if !log.is_empty() {
-                warn!("Warnings detected on shader: {}\n{}", name, log);
-            }
-            Ok(id)
-        }
-    }
-
-    pub fn begin_frame(&mut self) -> FrameId {
-        debug_assert!(!self.inside_frame);
-        self.inside_frame = true;
-
-        // Retrieve the currently set FBO.
-        let mut default_read_fbo = [0];
-        unsafe {
-            self.gl.get_integer_v(gl::READ_FRAMEBUFFER_BINDING, &mut default_read_fbo);
-        }
-        self.default_read_fbo = default_read_fbo[0] as gl::GLuint;
-        let mut default_draw_fbo = [0];
-        unsafe {
-            self.gl.get_integer_v(gl::DRAW_FRAMEBUFFER_BINDING, &mut default_draw_fbo);
-        }
-        self.default_draw_fbo = default_draw_fbo[0] as gl::GLuint;
-
-        // Texture state
-        for i in 0 .. self.bound_textures.len() {
-            self.bound_textures[i] = 0;
-            self.gl.active_texture(gl::TEXTURE0 + i as gl::GLuint);
-            self.gl.bind_texture(gl::TEXTURE_2D, 0);
-        }
-
-        // Shader state
-        self.bound_program = 0;
-        self.program_mode_id = UniformLocation::INVALID;
-        self.gl.use_program(0);
-
-        // Vertex state
-        self.bound_vao = 0;
-        self.gl.bind_vertex_array(0);
-
-        // FBO state
-        self.bound_read_fbo = FBOId(self.default_read_fbo);
-        self.bound_draw_fbo = FBOId(self.default_draw_fbo);
-
-        // Pixel op state
-        self.gl.pixel_store_i(gl::UNPACK_ALIGNMENT, 1);
-        self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
-
-        // Default is sampler 0, always
-        self.gl.active_texture(gl::TEXTURE0);
-
-        self.frame_id
-    }
-
-    fn bind_texture_impl(&mut self, slot: TextureSlot, id: gl::GLuint, target: gl::GLenum) {
-        debug_assert!(self.inside_frame);
-
-        if self.bound_textures[slot.0] != id {
-            self.bound_textures[slot.0] = id;
-            self.gl.active_texture(gl::TEXTURE0 + slot.0 as gl::GLuint);
-            self.gl.bind_texture(target, id);
-            self.gl.active_texture(gl::TEXTURE0);
-        }
-    }
-
-    pub fn bind_texture<S>(&mut self, sampler: S, texture: &Texture)
-    where
-        S: Into<TextureSlot>,
-    {
-        self.bind_texture_impl(sampler.into(), texture.id, texture.target);
-    }
-
-    pub fn bind_external_texture<S>(&mut self, sampler: S, external_texture: &ExternalTexture)
-    where
-        S: Into<TextureSlot>,
-    {
-        self.bind_texture_impl(sampler.into(), external_texture.id, external_texture.target);
-    }
-
-    pub fn bind_read_target_impl(&mut self, fbo_id: FBOId) {
-        debug_assert!(self.inside_frame);
-
-        if self.bound_read_fbo != fbo_id {
-            self.bound_read_fbo = fbo_id;
-            fbo_id.bind(self.gl(), FBOTarget::Read);
-        }
-    }
-
-    pub fn bind_read_target(&mut self, texture_and_layer: Option<(&Texture, i32)>) {
-        let fbo_id = texture_and_layer.map_or(FBOId(self.default_read_fbo), |texture_and_layer| {
-            texture_and_layer.0.fbo_ids[texture_and_layer.1 as usize]
-        });
-
-        self.bind_read_target_impl(fbo_id)
-    }
-
-    fn bind_draw_target_impl(&mut self, fbo_id: FBOId) {
-        debug_assert!(self.inside_frame);
-
-        if self.bound_draw_fbo != fbo_id {
-            self.bound_draw_fbo = fbo_id;
-            fbo_id.bind(self.gl(), FBOTarget::Draw);
-        }
-    }
-
-    pub fn bind_draw_target(
-        &mut self,
-        texture_and_layer: Option<(&Texture, i32)>,
-        dimensions: Option<DeviceUintSize>,
-    ) {
-        let fbo_id = texture_and_layer.map_or(FBOId(self.default_draw_fbo), |texture_and_layer| {
-            texture_and_layer.0.fbo_ids[texture_and_layer.1 as usize]
-        });
-
-        self.bind_draw_target_impl(fbo_id);
-
-        if let Some(dimensions) = dimensions {
-            self.gl.viewport(
-                0,
-                0,
-                dimensions.width as _,
-                dimensions.height as _,
-            );
-        }
-    }
-
-    pub fn create_fbo_for_external_texture(&mut self, texture_id: u32) -> FBOId {
-        let fbo = FBOId(self.gl.gen_framebuffers(1)[0]);
-        fbo.bind(self.gl(), FBOTarget::Draw);
-        self.gl.framebuffer_texture_2d(
-            gl::DRAW_FRAMEBUFFER,
-            gl::COLOR_ATTACHMENT0,
-            gl::TEXTURE_2D,
-            texture_id,
-            0,
-        );
-        self.bound_draw_fbo.bind(self.gl(), FBOTarget::Draw);
-        fbo
-    }
-
-    pub fn delete_fbo(&mut self, fbo: FBOId) {
-        self.gl.delete_framebuffers(&[fbo.0]);
-    }
-
-    pub fn bind_external_draw_target(&mut self, fbo_id: FBOId) {
-        debug_assert!(self.inside_frame);
-
-        if self.bound_draw_fbo != fbo_id {
-            self.bound_draw_fbo = fbo_id;
-            fbo_id.bind(self.gl(), FBOTarget::Draw);
-        }
-    }
-
-    pub fn bind_program(&mut self, program: &Program) {
-        debug_assert!(self.inside_frame);
-
-        if self.bound_program != program.id {
-            self.gl.use_program(program.id);
-            self.bound_program = program.id;
-            self.program_mode_id = UniformLocation(program.u_mode);
-        }
-    }
-
-    pub fn create_texture(
-        &mut self,
-        target: TextureTarget,
-        format: ImageFormat,
-    ) -> Texture {
-        Texture {
-            id: self.gl.gen_textures(1)[0],
-            target: get_gl_target(target),
-            width: 0,
-            height: 0,
-            layer_count: 0,
-            format,
-            filter: TextureFilter::Nearest,
-            render_target: None,
-            fbo_ids: vec![],
-            depth_rb: None,
-            last_frame_used: self.frame_id,
-        }
-    }
-
-    fn set_texture_parameters(&mut self, target: gl::GLuint, filter: TextureFilter) {
-        let mag_filter = match filter {
-            TextureFilter::Nearest => gl::NEAREST,
-            TextureFilter::Linear | TextureFilter::Trilinear => gl::LINEAR,
-        };
-
-        let min_filter = match filter {
-            TextureFilter::Nearest => gl::NEAREST,
-            TextureFilter::Linear => gl::LINEAR,
-            TextureFilter::Trilinear => gl::LINEAR_MIPMAP_LINEAR,
-        };
-
-        self.gl
-            .tex_parameter_i(target, gl::TEXTURE_MAG_FILTER, mag_filter as gl::GLint);
-        self.gl
-            .tex_parameter_i(target, gl::TEXTURE_MIN_FILTER, min_filter as gl::GLint);
-
-        self.gl
-            .tex_parameter_i(target, gl::TEXTURE_WRAP_S, gl::CLAMP_TO_EDGE as gl::GLint);
-        self.gl
-            .tex_parameter_i(target, gl::TEXTURE_WRAP_T, gl::CLAMP_TO_EDGE as gl::GLint);
-    }
-
-    /// Resizes a texture with enabled render target views,
-    /// preserves the data by blitting the old texture contents over.
-    pub fn resize_renderable_texture(
-        &mut self,
-        texture: &mut Texture,
-        new_size: DeviceUintSize,
-    ) {
-        debug_assert!(self.inside_frame);
-
-        let old_size = texture.get_dimensions();
-        let old_fbos = mem::replace(&mut texture.fbo_ids, Vec::new());
-        let old_texture_id = mem::replace(&mut texture.id, self.gl.gen_textures(1)[0]);
-
-        texture.width = new_size.width;
-        texture.height = new_size.height;
-        let rt_info = texture.render_target
-            .clone()
-            .expect("Only renderable textures are expected for resize here");
-
-        self.bind_texture(DEFAULT_TEXTURE, texture);
-        self.set_texture_parameters(texture.target, texture.filter);
-        self.update_target_storage::<u8>(texture, &rt_info, true, None);
-
-        let rect = DeviceIntRect::new(DeviceIntPoint::zero(), old_size.to_i32());
-        for (read_fbo, &draw_fbo) in old_fbos.into_iter().zip(&texture.fbo_ids) {
-            self.bind_read_target_impl(read_fbo);
-            self.bind_draw_target_impl(draw_fbo);
-            self.blit_render_target(rect, rect);
-            self.delete_fbo(read_fbo);
-        }
-        self.gl.delete_textures(&[old_texture_id]);
-        self.bind_read_target(None);
-    }
-
-    pub fn init_texture<T: Texel>(
-        &mut self,
-        texture: &mut Texture,
-        mut width: u32,
-        mut height: u32,
-        filter: TextureFilter,
-        render_target: Option<RenderTargetInfo>,
-        layer_count: i32,
-        pixels: Option<&[T]>,
-    ) {
-        debug_assert!(self.inside_frame);
-
-        if width > self.max_texture_size || height > self.max_texture_size {
-            error!("Attempting to allocate a texture of size {}x{} above the limit, trimming", width, height);
-            width = width.min(self.max_texture_size);
-            height = height.min(self.max_texture_size);
-        }
-
-        let is_resized = texture.width != width || texture.height != height;
-
-        texture.width = width;
-        texture.height = height;
-        texture.filter = filter;
-        texture.layer_count = layer_count;
-        texture.render_target = render_target;
-        texture.last_frame_used = self.frame_id;
-
-        self.bind_texture(DEFAULT_TEXTURE, texture);
-        self.set_texture_parameters(texture.target, filter);
-
-        match render_target {
-            Some(info) => {
-                self.update_target_storage(texture, &info, is_resized, pixels);
-            }
-            None => {
-                self.update_texture_storage(texture, pixels);
-            }
-        }
-    }
-
-    /// Updates the render target storage for the texture, creating FBOs as required.
-    fn update_target_storage<T: Texel>(
-        &mut self,
-        texture: &mut Texture,
-        rt_info: &RenderTargetInfo,
-        is_resized: bool,
-        pixels: Option<&[T]>,
-    ) {
-        assert!(texture.layer_count > 0 || texture.width + texture.height == 0);
-
-        let needed_layer_count = texture.layer_count - texture.fbo_ids.len() as i32;
-        let allocate_color = needed_layer_count != 0 || is_resized || pixels.is_some();
-
-        if allocate_color {
-            let desc = self.gl_describe_format(texture.format);
-            match texture.target {
-                gl::TEXTURE_2D_ARRAY => {
-                    self.gl.tex_image_3d(
-                        texture.target,
-                        0,
-                        desc.internal,
-                        texture.width as _,
-                        texture.height as _,
-                        texture.layer_count,
-                        0,
-                        desc.external,
-                        desc.pixel_type,
-                        pixels.map(texels_to_u8_slice),
-                    )
-                }
-                _ => {
-                    assert_eq!(texture.layer_count, 1);
-                    self.gl.tex_image_2d(
-                        texture.target,
-                        0,
-                        desc.internal,
-                        texture.width as _,
-                        texture.height as _,
-                        0,
-                        desc.external,
-                        desc.pixel_type,
-                        pixels.map(texels_to_u8_slice),
-                    )
-                }
-            }
-        }
-
-        if needed_layer_count > 0 {
-            // Create more framebuffers to fill the gap
-            let new_fbos = self.gl.gen_framebuffers(needed_layer_count);
-            texture
-                .fbo_ids
-                .extend(new_fbos.into_iter().map(FBOId));
-        } else if needed_layer_count < 0 {
-            // Remove extra framebuffers
-            for old in texture.fbo_ids.drain(texture.layer_count as usize ..) {
-                self.gl.delete_framebuffers(&[old.0]);
-            }
-        }
-
-        let (mut depth_rb, allocate_depth) = match texture.depth_rb {
-            Some(rbo) => (rbo.0, is_resized || !rt_info.has_depth),
-            None if rt_info.has_depth => {
-                let renderbuffer_ids = self.gl.gen_renderbuffers(1);
-                let depth_rb = renderbuffer_ids[0];
-                texture.depth_rb = Some(RBOId(depth_rb));
-                (depth_rb, true)
-            },
-            None => (0, false),
-        };
-
-        if allocate_depth {
-            if rt_info.has_depth {
-                self.gl.bind_renderbuffer(gl::RENDERBUFFER, depth_rb);
-                self.gl.renderbuffer_storage(
-                    gl::RENDERBUFFER,
-                    gl::DEPTH_COMPONENT24,
-                    texture.width as _,
-                    texture.height as _,
-                );
-            } else {
-                self.gl.delete_renderbuffers(&[depth_rb]);
-                depth_rb = 0;
-                texture.depth_rb = None;
-            }
-        }
-
-        if allocate_color || allocate_depth {
-            let original_bound_fbo = self.bound_draw_fbo;
-            for (fbo_index, &fbo_id) in texture.fbo_ids.iter().enumerate() {
-                self.bind_external_draw_target(fbo_id);
-                match texture.target {
-                    gl::TEXTURE_2D_ARRAY => {
-                        self.gl.framebuffer_texture_layer(
-                            gl::DRAW_FRAMEBUFFER,
-                            gl::COLOR_ATTACHMENT0,
-                            texture.id,
-                            0,
-                            fbo_index as _,
-                        )
-                    }
-                    _ => {
-                        assert_eq!(fbo_index, 0);
-                        self.gl.framebuffer_texture_2d(
-                            gl::DRAW_FRAMEBUFFER,
-                            gl::COLOR_ATTACHMENT0,
-                            texture.target,
-                            texture.id,
-                            0,
-                        )
-                    }
-                }
-
-                self.gl.framebuffer_renderbuffer(
-                    gl::DRAW_FRAMEBUFFER,
-                    gl::DEPTH_ATTACHMENT,
-                    gl::RENDERBUFFER,
-                    depth_rb,
-                );
-            }
-            self.bind_external_draw_target(original_bound_fbo);
-        }
-    }
-
-    fn update_texture_storage<T: Texel>(&mut self, texture: &Texture, pixels: Option<&[T]>) {
-        let desc = self.gl_describe_format(texture.format);
-        match texture.target {
-            gl::TEXTURE_2D_ARRAY => {
-                self.gl.tex_image_3d(
-                    gl::TEXTURE_2D_ARRAY,
-                    0,
-                    desc.internal,
-                    texture.width as _,
-                    texture.height as _,
-                    texture.layer_count,
-                    0,
-                    desc.external,
-                    desc.pixel_type,
-                    pixels.map(texels_to_u8_slice),
-                );
-            }
-            gl::TEXTURE_2D | gl::TEXTURE_RECTANGLE | gl::TEXTURE_EXTERNAL_OES => {
-                self.gl.tex_image_2d(
-                    texture.target,
-                    0,
-                    desc.internal,
-                    texture.width as _,
-                    texture.height as _,
-                    0,
-                    desc.external,
-                    desc.pixel_type,
-                    pixels.map(texels_to_u8_slice),
-                );
-            }
-            _ => panic!("BUG: Unexpected texture target!"),
-        }
-    }
-
-    pub fn blit_render_target(&mut self, src_rect: DeviceIntRect, dest_rect: DeviceIntRect) {
-        debug_assert!(self.inside_frame);
-
-        self.gl.blit_framebuffer(
-            src_rect.origin.x,
-            src_rect.origin.y,
-            src_rect.origin.x + src_rect.size.width,
-            src_rect.origin.y + src_rect.size.height,
-            dest_rect.origin.x,
-            dest_rect.origin.y,
-            dest_rect.origin.x + dest_rect.size.width,
-            dest_rect.origin.y + dest_rect.size.height,
-            gl::COLOR_BUFFER_BIT,
-            gl::LINEAR,
-        );
-    }
-
-    fn free_texture_storage_impl(&mut self, target: gl::GLenum, desc: FormatDesc) {
-        match target {
-            gl::TEXTURE_2D_ARRAY => {
-                self.gl.tex_image_3d(
-                    gl::TEXTURE_2D_ARRAY,
-                    0,
-                    desc.internal,
-                    0,
-                    0,
-                    0,
-                    0,
-                    desc.external,
-                    desc.pixel_type,
-                    None,
-                );
-            }
-            _ => {
-                self.gl.tex_image_2d(
-                    target,
-                    0,
-                    desc.internal,
-                    0,
-                    0,
-                    0,
-                    desc.external,
-                    desc.pixel_type,
-                    None,
-                );
-            }
-        }
-    }
-
-    pub fn free_texture_storage(&mut self, texture: &mut Texture) {
-        debug_assert!(self.inside_frame);
-
-        if texture.width + texture.height == 0 {
-            return;
-        }
-
-        self.bind_texture(DEFAULT_TEXTURE, texture);
-        let desc = self.gl_describe_format(texture.format);
-
-        self.free_texture_storage_impl(texture.target, desc);
-
-        if let Some(RBOId(depth_rb)) = texture.depth_rb.take() {
-            self.gl.delete_renderbuffers(&[depth_rb]);
-        }
-
-        if !texture.fbo_ids.is_empty() {
-            let fbo_ids: Vec<_> = texture
-                .fbo_ids
-                .drain(..)
-                .map(|FBOId(fbo_id)| fbo_id)
-                .collect();
-            self.gl.delete_framebuffers(&fbo_ids[..]);
-        }
-
-        texture.width = 0;
-        texture.height = 0;
-        texture.layer_count = 0;
-    }
-
-    pub fn delete_texture(&mut self, mut texture: Texture) {
-        self.free_texture_storage(&mut texture);
-        self.gl.delete_textures(&[texture.id]);
-
-        for bound_texture in &mut self.bound_textures {
-            if *bound_texture == texture.id {
-                *bound_texture = 0
-            }
-        }
-
-        texture.id = 0;
-    }
-
-    #[cfg(feature = "replay")]
-    pub fn delete_external_texture(&mut self, mut external: ExternalTexture) {
-        self.bind_external_texture(DEFAULT_TEXTURE, &external);
-        //Note: the format descriptor here doesn't really matter
-        self.free_texture_storage_impl(external.target, FormatDesc {
-            internal: gl::R8 as _,
-            external: gl::RED,
-            pixel_type: gl::UNSIGNED_BYTE,
-        });
-        self.gl.delete_textures(&[external.id]);
-        external.id = 0;
-    }
-
-    pub fn delete_program(&mut self, mut program: Program) {
-        self.gl.delete_program(program.id);
-        program.id = 0;
-    }
-
-    pub fn create_program(
-        &mut self,
-        base_filename: &str,
-        features: &str,
-        descriptor: &VertexDescriptor,
-    ) -> Result<Program, ShaderError> {
-        debug_assert!(self.inside_frame);
-
-        let gl_version_string = get_shader_version(&*self.gl);
-
-        let (vs_source, fs_source) = build_shader_strings(
-            gl_version_string,
-            features,
-            base_filename,
-            &self.resource_override_path,
-        );
-
-        let sources = ProgramSources::new(self.renderer_name.clone(), vs_source, fs_source);
-
-        // Create program
-        let pid = self.gl.create_program();
-
-        let mut loaded = false;
-
-        if let Some(ref cached_programs) = self.cached_programs {
-            if let Some(binary) = cached_programs.binaries.borrow().get(&sources)
-            {
-                self.gl.program_binary(pid, binary.format, &binary.binary);
-
-                let mut link_status = [0];
-                unsafe {
-                    self.gl.get_program_iv(pid, gl::LINK_STATUS, &mut link_status);
-                }
-                if link_status[0] == 0 {
-                    let error_log = self.gl.get_program_info_log(pid);
-                    error!(
-                      "Failed to load a program object with a program binary: {} renderer {}\n{}",
-                      base_filename,
-                      self.renderer_name,
-                      error_log
-                    );
-                    if let Some(ref program_cache_handler) = cached_programs.program_cache_handler {
-                        program_cache_handler.notify_program_binary_failed(&binary);
-                    }
-                } else {
-                    loaded = true;
-                }
-            }
-        }
-
-        if loaded == false {
-            // Compile the vertex shader
-            let vs_id =
-                match Device::compile_shader(&*self.gl, base_filename, gl::VERTEX_SHADER, &sources.vs_source) {
-                    Ok(vs_id) => vs_id,
-                    Err(err) => return Err(err),
-                };
-
-            // Compiler the fragment shader
-            let fs_id =
-                match Device::compile_shader(&*self.gl, base_filename, gl::FRAGMENT_SHADER, &sources.fs_source) {
-                    Ok(fs_id) => fs_id,
-                    Err(err) => {
-                        self.gl.delete_shader(vs_id);
-                        return Err(err);
-                    }
-                };
-
-            // Attach shaders
-            self.gl.attach_shader(pid, vs_id);
-            self.gl.attach_shader(pid, fs_id);
-
-            // Bind vertex attributes
-            for (i, attr) in descriptor
-                .vertex_attributes
-                .iter()
-                .chain(descriptor.instance_attributes.iter())
-                .enumerate()
-            {
-                self.gl
-                    .bind_attrib_location(pid, i as gl::GLuint, attr.name);
-            }
-
-            if self.cached_programs.is_some() {
-                self.gl.program_parameter_i(pid, gl::PROGRAM_BINARY_RETRIEVABLE_HINT, gl::TRUE as gl::GLint);
-            }
-
-            // Link!
-            self.gl.link_program(pid);
-
-            // GL recommends detaching and deleting shaders once the link
-            // is complete (whether successful or not). This allows the driver
-            // to free any memory associated with the parsing and compilation.
-            self.gl.detach_shader(pid, vs_id);
-            self.gl.detach_shader(pid, fs_id);
-            self.gl.delete_shader(vs_id);
-            self.gl.delete_shader(fs_id);
-
-            let mut link_status = [0];
-            unsafe {
-                self.gl.get_program_iv(pid, gl::LINK_STATUS, &mut link_status);
-            }
-            if link_status[0] == 0 {
-                let error_log = self.gl.get_program_info_log(pid);
-                error!(
-                    "Failed to link shader program: {}\n{}",
-                    base_filename,
-                    error_log
-                );
-                self.gl.delete_program(pid);
-                return Err(ShaderError::Link(base_filename.to_string(), error_log));
-            }
-        }
-
-        if let Some(ref cached_programs) = self.cached_programs {
-            if !cached_programs.binaries.borrow().contains_key(&sources) {
-                let (buffer, format) = self.gl.get_program_binary(pid);
-                if buffer.len() > 0 {
-                    let program_binary = Arc::new(ProgramBinary::new(buffer, format, &sources));
-                    if let Some(ref program_cache_handler) = cached_programs.program_cache_handler {
-                        program_cache_handler.notify_binary_added(&program_binary);
-                    }
-                    cached_programs.binaries.borrow_mut().insert(sources, program_binary);
-                }
-            }
-        }
-
-        let u_transform = self.gl.get_uniform_location(pid, "uTransform");
-        let u_device_pixel_ratio = self.gl.get_uniform_location(pid, "uDevicePixelRatio");
-        let u_mode = self.gl.get_uniform_location(pid, "uMode");
-
-        let program = Program {
-            id: pid,
-            u_transform,
-            u_device_pixel_ratio,
-            u_mode,
-        };
-
-        self.bind_program(&program);
-
-        Ok(program)
-    }
-
-    pub fn bind_shader_samplers<S>(&mut self, program: &Program, bindings: &[(&'static str, S)])
-    where
-        S: Into<TextureSlot> + Copy,
-    {
-        for binding in bindings {
-            let u_location = self.gl.get_uniform_location(program.id, binding.0);
-            if u_location != -1 {
-                self.bind_program(program);
-                self.gl
-                    .uniform_1i(u_location, binding.1.into().0 as gl::GLint);
-            }
-        }
-    }
-
-    #[cfg(feature = "debug_renderer")]
-    pub fn get_uniform_location(&self, program: &Program, name: &str) -> UniformLocation {
-        UniformLocation(self.gl.get_uniform_location(program.id, name))
-    }
-
-    pub fn set_uniforms(
-        &self,
-        program: &Program,
-        transform: &Transform3D<f32>,
-    ) {
-        debug_assert!(self.inside_frame);
-        self.gl
-            .uniform_matrix_4fv(program.u_transform, false, &transform.to_row_major_array());
-        self.gl
-            .uniform_1f(program.u_device_pixel_ratio, self.device_pixel_ratio);
-    }
-
-    pub fn switch_mode(&self, mode: i32) {
-        debug_assert!(self.inside_frame);
-        self.gl.uniform_1i(self.program_mode_id.0, mode);
-    }
-
-    pub fn create_pbo(&mut self) -> PBO {
-        let id = self.gl.gen_buffers(1)[0];
-        PBO { id }
-    }
-
-    pub fn delete_pbo(&mut self, mut pbo: PBO) {
-        self.gl.delete_buffers(&[pbo.id]);
-        pbo.id = 0;
-    }
-
-    pub fn upload_texture<'a, T>(
-        &'a mut self,
-        texture: &'a Texture,
-        pbo: &PBO,
-        upload_count: usize,
-    ) -> TextureUploader<'a, T> {
-        debug_assert!(self.inside_frame);
-        self.bind_texture(DEFAULT_TEXTURE, texture);
-
-        let buffer = match self.upload_method {
-            UploadMethod::Immediate => None,
-            UploadMethod::PixelBuffer(hint) => {
-                let upload_size = upload_count * mem::size_of::<T>();
-                self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, pbo.id);
-                if upload_size != 0 {
-                    self.gl.buffer_data_untyped(
-                        gl::PIXEL_UNPACK_BUFFER,
-                        upload_size as _,
-                        ptr::null(),
-                        hint.to_gl(),
-                    );
-                }
-                Some(PixelBuffer::new(hint.to_gl(), upload_size))
-            },
-        };
-
-        TextureUploader {
-            target: UploadTarget {
-                gl: &*self.gl,
-                bgra_format: self.bgra_format,
-                texture,
-            },
-            buffer,
-            marker: PhantomData,
-        }
-    }
-
-    #[cfg(any(feature = "debug_renderer", feature = "capture"))]
-    pub fn read_pixels(&mut self, img_desc: &ImageDescriptor) -> Vec<u8> {
-        let desc = self.gl_describe_format(img_desc.format);
-        self.gl.read_pixels(
-            0, 0,
-            img_desc.size.width as i32,
-            img_desc.size.height as i32,
-            desc.external,
-            desc.pixel_type,
-        )
-    }
-
-    /// Read rectangle of pixels into the specified output slice.
-    pub fn read_pixels_into(
-        &mut self,
-        rect: DeviceUintRect,
-        format: ReadPixelsFormat,
-        output: &mut [u8],
-    ) {
-        let (bytes_per_pixel, desc) = match format {
-            ReadPixelsFormat::Standard(imf) => {
-                (imf.bytes_per_pixel(), self.gl_describe_format(imf))
-            }
-            ReadPixelsFormat::Rgba8 => {
-                (4, FormatDesc {
-                    external: gl::RGBA,
-                    internal: gl::RGBA8 as _,
-                    pixel_type: gl::UNSIGNED_BYTE,
-                })
-            }
-        };
-        let size_in_bytes = (bytes_per_pixel * rect.size.width * rect.size.height) as usize;
-        assert_eq!(output.len(), size_in_bytes);
-
-        self.gl.flush();
-        self.gl.read_pixels_into_buffer(
-            rect.origin.x as _,
-            rect.origin.y as _,
-            rect.size.width as _,
-            rect.size.height as _,
-            desc.external,
-            desc.pixel_type,
-            output,
-        );
-    }
-
-    /// Get texels of a texture into the specified output slice.
-    #[cfg(feature = "debug_renderer")]
-    pub fn get_tex_image_into(
-        &mut self,
-        texture: &Texture,
-        format: ImageFormat,
-        output: &mut [u8],
-    ) {
-        self.bind_texture(DEFAULT_TEXTURE, texture);
-        let desc = self.gl_describe_format(format);
-        self.gl.get_tex_image_into_buffer(
-            texture.target,
-            0,
-            desc.external,
-            desc.pixel_type,
-            output,
-        );
-    }
-
-    /// Attaches the provided texture to the current Read FBO binding.
-    #[cfg(any(feature = "debug_renderer", feature="capture"))]
-    fn attach_read_texture_raw(
-        &mut self, texture_id: gl::GLuint, target: gl::GLuint, layer_id: i32
-    ) {
-        match target {
-            gl::TEXTURE_2D_ARRAY => {
-                self.gl.framebuffer_texture_layer(
-                    gl::READ_FRAMEBUFFER,
-                    gl::COLOR_ATTACHMENT0,
-                    texture_id,
-                    0,
-                    layer_id,
-                )
-            }
-            _ => {
-                assert_eq!(layer_id, 0);
-                self.gl.framebuffer_texture_2d(
-                    gl::READ_FRAMEBUFFER,
-                    gl::COLOR_ATTACHMENT0,
-                    target,
-                    texture_id,
-                    0,
-                )
-            }
-        }
-    }
-
-    #[cfg(any(feature = "debug_renderer", feature="capture"))]
-    pub fn attach_read_texture_external(
-        &mut self, texture_id: gl::GLuint, target: TextureTarget, layer_id: i32
-    ) {
-        self.attach_read_texture_raw(texture_id, get_gl_target(target), layer_id)
-    }
-
-    #[cfg(any(feature = "debug_renderer", feature="capture"))]
-    pub fn attach_read_texture(&mut self, texture: &Texture, layer_id: i32) {
-        self.attach_read_texture_raw(texture.id, texture.target, layer_id)
-    }
-
-    fn bind_vao_impl(&mut self, id: gl::GLuint) {
-        debug_assert!(self.inside_frame);
-
-        if self.bound_vao != id {
-            self.bound_vao = id;
-            self.gl.bind_vertex_array(id);
-        }
-    }
-
-    pub fn bind_vao(&mut self, vao: &VAO) {
-        self.bind_vao_impl(vao.id)
-    }
-
-    pub fn bind_custom_vao(&mut self, vao: &CustomVAO) {
-        self.bind_vao_impl(vao.id)
-    }
-
-    fn create_vao_with_vbos(
-        &mut self,
-        descriptor: &VertexDescriptor,
-        main_vbo_id: VBOId,
-        instance_vbo_id: VBOId,
-        ibo_id: IBOId,
-        owns_vertices_and_indices: bool,
-    ) -> VAO {
-        debug_assert!(self.inside_frame);
-
-        let instance_stride = descriptor.instance_stride() as usize;
-        let vao_id = self.gl.gen_vertex_arrays(1)[0];
-
-        self.gl.bind_vertex_array(vao_id);
-
-        descriptor.bind(self.gl(), main_vbo_id, instance_vbo_id);
-        ibo_id.bind(self.gl()); // force it to be a part of VAO
-
-        self.gl.bind_vertex_array(0);
-
-        VAO {
-            id: vao_id,
-            ibo_id,
-            main_vbo_id,
-            instance_vbo_id,
-            instance_stride,
-            owns_vertices_and_indices,
-        }
-    }
-
-    pub fn create_custom_vao(
-        &mut self,
-        streams: &[Stream],
-    ) -> CustomVAO {
-        debug_assert!(self.inside_frame);
-
-        let vao_id = self.gl.gen_vertex_arrays(1)[0];
-        self.gl.bind_vertex_array(vao_id);
-
-        let mut attrib_index = 0;
-        for stream in streams {
-            VertexDescriptor::bind_attributes(
-                stream.attributes,
-                attrib_index,
-                0,
-                self.gl(),
-                stream.vbo,
-            );
-            attrib_index += stream.attributes.len();
-        }
-
-        self.gl.bind_vertex_array(0);
-
-        CustomVAO {
-            id: vao_id,
-        }
-    }
-
-    pub fn delete_custom_vao(&mut self, mut vao: CustomVAO) {
-        self.gl.delete_vertex_arrays(&[vao.id]);
-        vao.id = 0;
-    }
-
-    pub fn create_vbo<T>(&mut self) -> VBO<T> {
-        let ids = self.gl.gen_buffers(1);
-        VBO {
-            id: ids[0],
-            target: gl::ARRAY_BUFFER,
-            allocated_count: 0,
-            marker: PhantomData,
-        }
-    }
-
-    pub fn delete_vbo<T>(&mut self, mut vbo: VBO<T>) {
-        self.gl.delete_buffers(&[vbo.id]);
-        vbo.id = 0;
-    }
-
-    pub fn create_vao(&mut self, descriptor: &VertexDescriptor) -> VAO {
-        debug_assert!(self.inside_frame);
-
-        let buffer_ids = self.gl.gen_buffers(3);
-        let ibo_id = IBOId(buffer_ids[0]);
-        let main_vbo_id = VBOId(buffer_ids[1]);
-        let intance_vbo_id = VBOId(buffer_ids[2]);
-
-        self.create_vao_with_vbos(descriptor, main_vbo_id, intance_vbo_id, ibo_id, true)
-    }
-
-    pub fn delete_vao(&mut self, mut vao: VAO) {
-        self.gl.delete_vertex_arrays(&[vao.id]);
-        vao.id = 0;
-
-        if vao.owns_vertices_and_indices {
-            self.gl.delete_buffers(&[vao.ibo_id.0]);
-            self.gl.delete_buffers(&[vao.main_vbo_id.0]);
-        }
-
-        self.gl.delete_buffers(&[vao.instance_vbo_id.0])
-    }
-
-    pub fn allocate_vbo<V>(
-        &mut self,
-        vbo: &mut VBO<V>,
-        count: usize,
-        usage_hint: VertexUsageHint,
-    ) {
-        debug_assert!(self.inside_frame);
-        vbo.allocated_count = count;
-
-        self.gl.bind_buffer(vbo.target, vbo.id);
-        self.gl.buffer_data_untyped(
-            vbo.target,
-            (count * mem::size_of::<V>()) as _,
-            ptr::null(),
-            usage_hint.to_gl(),
-        );
-    }
-
-    pub fn fill_vbo<V>(
-        &mut self,
-        vbo: &VBO<V>,
-        data: &[V],
-        offset: usize,
-    ) {
-        debug_assert!(self.inside_frame);
-        assert!(offset + data.len() <= vbo.allocated_count);
-        let stride = mem::size_of::<V>();
-
-        self.gl.bind_buffer(vbo.target, vbo.id);
-        self.gl.buffer_sub_data_untyped(
-            vbo.target,
-            (offset * stride) as _,
-            (data.len() * stride) as _,
-            data.as_ptr() as _,
-        );
-    }
-
-    fn update_vbo_data<V>(
-        &mut self,
-        vbo: VBOId,
-        vertices: &[V],
-        usage_hint: VertexUsageHint,
-    ) {
-        debug_assert!(self.inside_frame);
-
-        vbo.bind(self.gl());
-        gl::buffer_data(self.gl(), gl::ARRAY_BUFFER, vertices, usage_hint.to_gl());
-    }
-
-    pub fn create_vao_with_new_instances(
-        &mut self,
-        descriptor: &VertexDescriptor,
-        base_vao: &VAO,
-    ) -> VAO {
-        debug_assert!(self.inside_frame);
-
-        let buffer_ids = self.gl.gen_buffers(1);
-        let intance_vbo_id = VBOId(buffer_ids[0]);
-
-        self.create_vao_with_vbos(
-            descriptor,
-            base_vao.main_vbo_id,
-            intance_vbo_id,
-            base_vao.ibo_id,
-            false,
-        )
-    }
-
-    pub fn update_vao_main_vertices<V>(
-        &mut self,
-        vao: &VAO,
-        vertices: &[V],
-        usage_hint: VertexUsageHint,
-    ) {
-        debug_assert_eq!(self.bound_vao, vao.id);
-        self.update_vbo_data(vao.main_vbo_id, vertices, usage_hint)
-    }
-
-    pub fn update_vao_instances<V>(
-        &mut self,
-        vao: &VAO,
-        instances: &[V],
-        usage_hint: VertexUsageHint,
-    ) {
-        debug_assert_eq!(self.bound_vao, vao.id);
-        debug_assert_eq!(vao.instance_stride as usize, mem::size_of::<V>());
-
-        self.update_vbo_data(vao.instance_vbo_id, instances, usage_hint)
-    }
-
-    pub fn update_vao_indices<I>(&mut self, vao: &VAO, indices: &[I], usage_hint: VertexUsageHint) {
-        debug_assert!(self.inside_frame);
-        debug_assert_eq!(self.bound_vao, vao.id);
-
-        vao.ibo_id.bind(self.gl());
-        gl::buffer_data(
-            self.gl(),
-            gl::ELEMENT_ARRAY_BUFFER,
-            indices,
-            usage_hint.to_gl(),
-        );
-    }
-
-    pub fn draw_triangles_u16(&mut self, first_vertex: i32, index_count: i32) {
-        debug_assert!(self.inside_frame);
-        self.gl.draw_elements(
-            gl::TRIANGLES,
-            index_count,
-            gl::UNSIGNED_SHORT,
-            first_vertex as u32 * 2,
-        );
-    }
-
-    #[cfg(feature = "debug_renderer")]
-    pub fn draw_triangles_u32(&mut self, first_vertex: i32, index_count: i32) {
-        debug_assert!(self.inside_frame);
-        self.gl.draw_elements(
-            gl::TRIANGLES,
-            index_count,
-            gl::UNSIGNED_INT,
-            first_vertex as u32 * 4,
-        );
-    }
-
-    pub fn draw_nonindexed_points(&mut self, first_vertex: i32, vertex_count: i32) {
-        debug_assert!(self.inside_frame);
-        self.gl.draw_arrays(gl::POINTS, first_vertex, vertex_count);
-    }
-
-    #[cfg(feature = "debug_renderer")]
-    pub fn draw_nonindexed_lines(&mut self, first_vertex: i32, vertex_count: i32) {
-        debug_assert!(self.inside_frame);
-        self.gl.draw_arrays(gl::LINES, first_vertex, vertex_count);
-    }
-
-    pub fn draw_indexed_triangles_instanced_u16(&mut self, index_count: i32, instance_count: i32) {
-        debug_assert!(self.inside_frame);
-        self.gl.draw_elements_instanced(
-            gl::TRIANGLES,
-            index_count,
-            gl::UNSIGNED_SHORT,
-            0,
-            instance_count,
-        );
-    }
-
-    pub fn end_frame(&mut self) {
-        self.bind_draw_target(None, None);
-        self.bind_read_target(None);
-
-        debug_assert!(self.inside_frame);
-        self.inside_frame = false;
-
-        self.gl.bind_texture(gl::TEXTURE_2D, 0);
-        self.gl.use_program(0);
-
-        for i in 0 .. self.bound_textures.len() {
-            self.gl.active_texture(gl::TEXTURE0 + i as gl::GLuint);
-            self.gl.bind_texture(gl::TEXTURE_2D, 0);
-        }
-
-        self.gl.active_texture(gl::TEXTURE0);
-
-        self.frame_id.0 += 1;
-    }
-
-    pub fn clear_target(
-        &self,
-        color: Option<[f32; 4]>,
-        depth: Option<f32>,
-        rect: Option<DeviceIntRect>,
-    ) {
-        let mut clear_bits = 0;
-
-        if let Some(color) = color {
-            self.gl.clear_color(color[0], color[1], color[2], color[3]);
-            clear_bits |= gl::COLOR_BUFFER_BIT;
-        }
-
-        if let Some(depth) = depth {
-            if cfg!(debug_assertions) {
-                let mut mask = [0];
-                unsafe {
-                    self.gl.get_boolean_v(gl::DEPTH_WRITEMASK, &mut mask);
-                }
-                assert_ne!(mask[0], 0);
-            }
-            self.gl.clear_depth(depth as f64);
-            clear_bits |= gl::DEPTH_BUFFER_BIT;
-        }
-
-        if clear_bits != 0 {
-            match rect {
-                Some(rect) => {
-                    self.gl.enable(gl::SCISSOR_TEST);
-                    self.gl.scissor(
-                        rect.origin.x,
-                        rect.origin.y,
-                        rect.size.width,
-                        rect.size.height,
-                    );
-                    self.gl.clear(clear_bits);
-                    self.gl.disable(gl::SCISSOR_TEST);
-                }
-                None => {
-                    self.gl.clear(clear_bits);
-                }
-            }
-        }
-    }
-
-    pub fn enable_depth(&self) {
-        self.gl.enable(gl::DEPTH_TEST);
-    }
-
-    pub fn disable_depth(&self) {
-        self.gl.disable(gl::DEPTH_TEST);
-    }
-
-    pub fn set_depth_func(&self, depth_func: DepthFunction) {
-        self.gl.depth_func(depth_func as gl::GLuint);
-    }
-
-    pub fn enable_depth_write(&self) {
-        self.gl.depth_mask(true);
-    }
-
-    pub fn disable_depth_write(&self) {
-        self.gl.depth_mask(false);
-    }
-
-    pub fn disable_stencil(&self) {
-        self.gl.disable(gl::STENCIL_TEST);
-    }
-
-    pub fn set_scissor_rect(&self, rect: DeviceIntRect) {
-        self.gl.scissor(
-            rect.origin.x,
-            rect.origin.y,
-            rect.size.width,
-            rect.size.height,
-        );
-    }
-
-    pub fn enable_scissor(&self) {
-        self.gl.enable(gl::SCISSOR_TEST);
-    }
-
-    pub fn disable_scissor(&self) {
-        self.gl.disable(gl::SCISSOR_TEST);
-    }
-
-    pub fn set_blend(&self, enable: bool) {
-        if enable {
-            self.gl.enable(gl::BLEND);
-        } else {
-            self.gl.disable(gl::BLEND);
-        }
-    }
-
-    pub fn set_blend_mode_alpha(&self) {
-        self.gl.blend_func_separate(gl::SRC_ALPHA, gl::ONE_MINUS_SRC_ALPHA,
-                                    gl::ONE, gl::ONE);
-        self.gl.blend_equation(gl::FUNC_ADD);
-    }
-
-    pub fn set_blend_mode_premultiplied_alpha(&self) {
-        self.gl.blend_func(gl::ONE, gl::ONE_MINUS_SRC_ALPHA);
-        self.gl.blend_equation(gl::FUNC_ADD);
-    }
-
-    pub fn set_blend_mode_premultiplied_dest_out(&self) {
-        self.gl.blend_func(gl::ZERO, gl::ONE_MINUS_SRC_ALPHA);
-        self.gl.blend_equation(gl::FUNC_ADD);
-    }
-
-    pub fn set_blend_mode_multiply(&self) {
-        self.gl
-            .blend_func_separate(gl::ZERO, gl::SRC_COLOR, gl::ZERO, gl::SRC_ALPHA);
-        self.gl.blend_equation(gl::FUNC_ADD);
-    }
-    pub fn set_blend_mode_max(&self) {
-        self.gl
-            .blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE);
-        self.gl.blend_equation_separate(gl::MAX, gl::FUNC_ADD);
-    }
-    #[cfg(feature = "debug_renderer")]
-    pub fn set_blend_mode_min(&self) {
-        self.gl
-            .blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE);
-        self.gl.blend_equation_separate(gl::MIN, gl::FUNC_ADD);
-    }
-    pub fn set_blend_mode_subpixel_pass0(&self) {
-        self.gl.blend_func(gl::ZERO, gl::ONE_MINUS_SRC_COLOR);
-        self.gl.blend_equation(gl::FUNC_ADD);
-    }
-    pub fn set_blend_mode_subpixel_pass1(&self) {
-        self.gl.blend_func(gl::ONE, gl::ONE);
-        self.gl.blend_equation(gl::FUNC_ADD);
-    }
-    pub fn set_blend_mode_subpixel_with_bg_color_pass0(&self) {
-        self.gl.blend_func_separate(gl::ZERO, gl::ONE_MINUS_SRC_COLOR, gl::ZERO, gl::ONE);
-        self.gl.blend_equation(gl::FUNC_ADD);
-    }
-    pub fn set_blend_mode_subpixel_with_bg_color_pass1(&self) {
-        self.gl.blend_func_separate(gl::ONE_MINUS_DST_ALPHA, gl::ONE, gl::ZERO, gl::ONE);
-        self.gl.blend_equation(gl::FUNC_ADD);
-    }
-    pub fn set_blend_mode_subpixel_with_bg_color_pass2(&self) {
-        self.gl.blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE_MINUS_SRC_ALPHA);
-        self.gl.blend_equation(gl::FUNC_ADD);
-    }
-    pub fn set_blend_mode_subpixel_constant_text_color(&self, color: ColorF) {
-        // color is an unpremultiplied color.
-        self.gl.blend_color(color.r, color.g, color.b, 1.0);
-        self.gl
-            .blend_func(gl::CONSTANT_COLOR, gl::ONE_MINUS_SRC_COLOR);
-        self.gl.blend_equation(gl::FUNC_ADD);
-    }
-    pub fn set_blend_mode_subpixel_dual_source(&self) {
-        self.gl.blend_func(gl::ONE, gl::ONE_MINUS_SRC1_COLOR);
-        self.gl.blend_equation(gl::FUNC_ADD);
-    }
-
-    pub fn supports_extension(&self, extension: &str) -> bool {
-        supports_extension(&self.extensions, extension)
-    }
-
-    pub fn echo_driver_messages(&self) {
-        for msg in self.gl.get_debug_messages() {
-            let level = match msg.severity {
-                gl::DEBUG_SEVERITY_HIGH => Level::Error,
-                gl::DEBUG_SEVERITY_MEDIUM => Level::Warn,
-                gl::DEBUG_SEVERITY_LOW => Level::Info,
-                gl::DEBUG_SEVERITY_NOTIFICATION => Level::Debug,
-                _ => Level::Trace,
-            };
-            let ty = match msg.ty {
-                gl::DEBUG_TYPE_ERROR => "error",
-                gl::DEBUG_TYPE_DEPRECATED_BEHAVIOR => "deprecated",
-                gl::DEBUG_TYPE_UNDEFINED_BEHAVIOR => "undefined",
-                gl::DEBUG_TYPE_PORTABILITY => "portability",
-                gl::DEBUG_TYPE_PERFORMANCE => "perf",
-                gl::DEBUG_TYPE_MARKER => "marker",
-                gl::DEBUG_TYPE_PUSH_GROUP => "group push",
-                gl::DEBUG_TYPE_POP_GROUP => "group pop",
-                gl::DEBUG_TYPE_OTHER => "other",
-                _ => "?",
-            };
-            log!(level, "({}) {}", ty, msg.message);
-        }
-    }
-
-    fn gl_describe_format(&self, format: ImageFormat) -> FormatDesc {
-        match format {
-            ImageFormat::R8 => FormatDesc {
-                internal: gl::RED as _,
-                external: gl::RED,
-                pixel_type: gl::UNSIGNED_BYTE,
-            },
-            ImageFormat::BGRA8 => {
-                let external = self.bgra_format;
-                FormatDesc {
-                    internal: match self.gl.get_type() {
-                        gl::GlType::Gl => gl::RGBA as _,
-                        gl::GlType::Gles => external as _,
-                    },
-                    external,
-                    pixel_type: gl::UNSIGNED_BYTE,
-                }
-            },
-            ImageFormat::RGBAF32 => FormatDesc {
-                internal: gl::RGBA32F as _,
-                external: gl::RGBA,
-                pixel_type: gl::FLOAT,
-            },
-            ImageFormat::RG8 => FormatDesc {
-                internal: gl::RG8 as _,
-                external: gl::RG,
-                pixel_type: gl::UNSIGNED_BYTE,
-            },
-        }
-    }
-}
-
-struct FormatDesc {
-    internal: gl::GLint,
-    external: gl::GLuint,
-    pixel_type: gl::GLuint,
-}
-
-struct UploadChunk {
-    rect: DeviceUintRect,
-    layer_index: i32,
-    stride: Option<u32>,
-    offset: usize,
-}
-
-struct PixelBuffer {
-    usage: gl::GLenum,
-    size_allocated: usize,
-    size_used: usize,
-    // small vector avoids heap allocation for a single chunk
-    chunks: SmallVec<[UploadChunk; 1]>,
-}
-
-impl PixelBuffer {
-    fn new(
-        usage: gl::GLenum,
-        size_allocated: usize,
-    ) -> Self {
-        PixelBuffer {
-            usage,
-            size_allocated,
-            size_used: 0,
-            chunks: SmallVec::new(),
-        }
-    }
-}
-
-struct UploadTarget<'a> {
-    gl: &'a gl::Gl,
-    bgra_format: gl::GLuint,
-    texture: &'a Texture,
-}
-
-pub struct TextureUploader<'a, T> {
-    target: UploadTarget<'a>,
-    buffer: Option<PixelBuffer>,
-    marker: PhantomData<T>,
-}
-
-impl<'a, T> Drop for TextureUploader<'a, T> {
-    fn drop(&mut self) {
-        if let Some(buffer) = self.buffer.take() {
-            for chunk in buffer.chunks {
-                self.target.update_impl(chunk);
-            }
-            self.target.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
-        }
-    }
-}
-
-impl<'a, T> TextureUploader<'a, T> {
-    pub fn upload(
-        &mut self,
-        rect: DeviceUintRect,
-        layer_index: i32,
-        stride: Option<u32>,
-        data: &[T],
-    ) {
-        match self.buffer {
-            Some(ref mut buffer) => {
-                let upload_size = mem::size_of::<T>() * data.len();
-                if buffer.size_used + upload_size > buffer.size_allocated {
-                    // flush
-                    for chunk in buffer.chunks.drain() {
-                        self.target.update_impl(chunk);
-                    }
-                    buffer.size_used = 0;
-                }
-
-                if upload_size > buffer.size_allocated {
-                    gl::buffer_data(
-                        self.target.gl,
-                        gl::PIXEL_UNPACK_BUFFER,
-                        data,
-                        buffer.usage,
-                    );
-                    buffer.size_allocated = upload_size;
-                } else {
-                    gl::buffer_sub_data(
-                        self.target.gl,
-                        gl::PIXEL_UNPACK_BUFFER,
-                        buffer.size_used as _,
-                        data,
-                    );
-                }
-
-                buffer.chunks.push(UploadChunk {
-                    rect, layer_index, stride,
-                    offset: buffer.size_used,
-                });
-                buffer.size_used += upload_size;
-            }
-            None => {
-                self.target.update_impl(UploadChunk {
-                    rect, layer_index, stride,
-                    offset: data.as_ptr() as _,
-                });
-            }
-        }
-    }
-}
-
-impl<'a> UploadTarget<'a> {
-    fn update_impl(&mut self, chunk: UploadChunk) {
-        let (gl_format, bpp, data_type) = match self.texture.format {
-            ImageFormat::R8 => (gl::RED, 1, gl::UNSIGNED_BYTE),
-            ImageFormat::BGRA8 => (self.bgra_format, 4, gl::UNSIGNED_BYTE),
-            ImageFormat::RG8 => (gl::RG, 2, gl::UNSIGNED_BYTE),
-            ImageFormat::RGBAF32 => (gl::RGBA, 16, gl::FLOAT),
-        };
-
-        let row_length = match chunk.stride {
-            Some(value) => value / bpp,
-            None => self.texture.width,
-        };
-
-        if chunk.stride.is_some() {
-            self.gl.pixel_store_i(
-                gl::UNPACK_ROW_LENGTH,
-                row_length as _,
-            );
-        }
-
-        let pos = chunk.rect.origin;
-        let size = chunk.rect.size;
-
-        match self.texture.target {
-            gl::TEXTURE_2D_ARRAY => {
-                self.gl.tex_sub_image_3d_pbo(
-                    self.texture.target,
-                    0,
-                    pos.x as _,
-                    pos.y as _,
-                    chunk.layer_index,
-                    size.width as _,
-                    size.height as _,
-                    1,
-                    gl_format,
-                    data_type,
-                    chunk.offset,
-                );
-            }
-            gl::TEXTURE_2D | gl::TEXTURE_RECTANGLE | gl::TEXTURE_EXTERNAL_OES => {
-                self.gl.tex_sub_image_2d_pbo(
-                    self.texture.target,
-                    0,
-                    pos.x as _,
-                    pos.y as _,
-                    size.width as _,
-                    size.height as _,
-                    gl_format,
-                    data_type,
-                    chunk.offset,
-                );
-            }
-            _ => panic!("BUG: Unexpected texture target!"),
-        }
-
-        // If using tri-linear filtering, build the mip-map chain for this texture.
-        if self.texture.filter == TextureFilter::Trilinear {
-            self.gl.generate_mipmap(self.texture.target);
-        }
-
-        // Reset row length to 0, otherwise the stride would apply to all texture uploads.
-        if chunk.stride.is_some() {
-            self.gl.pixel_store_i(gl::UNPACK_ROW_LENGTH, 0 as _);
-        }
-    }
-}
-
-fn texels_to_u8_slice<T: Texel>(texels: &[T]) -> &[u8] {
-    unsafe {
-        slice::from_raw_parts(texels.as_ptr() as *const u8, texels.len() * mem::size_of::<T>())
-    }
-}
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/src/device/gl.rs
@@ -0,0 +1,2434 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use super::super::shader_source;
+use api::{ColorF, ImageFormat};
+use api::{DeviceIntPoint, DeviceIntRect, DeviceUintRect, DeviceUintSize};
+use api::TextureTarget;
+#[cfg(any(feature = "debug_renderer", feature="capture"))]
+use api::ImageDescriptor;
+use euclid::Transform3D;
+use gleam::gl;
+use internal_types::{FastHashMap, RenderTargetInfo};
+use log::Level;
+use smallvec::SmallVec;
+use std::cell::RefCell;
+use std::fs::File;
+use std::io::Read;
+use std::marker::PhantomData;
+use std::mem;
+use std::ops::Add;
+use std::path::PathBuf;
+use std::ptr;
+use std::rc::Rc;
+use std::slice;
+use std::sync::Arc;
+use std::thread;
+
+#[derive(Debug, Copy, Clone, PartialEq, Ord, Eq, PartialOrd)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct FrameId(usize);
+
+impl FrameId {
+    pub fn new(value: usize) -> Self {
+        FrameId(value)
+    }
+}
+
+impl Add<usize> for FrameId {
+    type Output = FrameId;
+
+    fn add(self, other: usize) -> FrameId {
+        FrameId(self.0 + other)
+    }
+}
+
+const GL_FORMAT_RGBA: gl::GLuint = gl::RGBA;
+
+const GL_FORMAT_BGRA_GL: gl::GLuint = gl::BGRA;
+
+const GL_FORMAT_BGRA_GLES: gl::GLuint = gl::BGRA_EXT;
+
+const SHADER_VERSION_GL: &str = "#version 150\n";
+const SHADER_VERSION_GLES: &str = "#version 300 es\n";
+
+const SHADER_KIND_VERTEX: &str = "#define WR_VERTEX_SHADER\n";
+const SHADER_KIND_FRAGMENT: &str = "#define WR_FRAGMENT_SHADER\n";
+const SHADER_IMPORT: &str = "#include ";
+
+pub struct TextureSlot(pub usize);
+
+// In some places we need to temporarily bind a texture to any slot.
+const DEFAULT_TEXTURE: TextureSlot = TextureSlot(0);
+
+#[repr(u32)]
+pub enum DepthFunction {
+    #[cfg(feature = "debug_renderer")]
+    Less = gl::LESS,
+    LessEqual = gl::LEQUAL,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum TextureFilter {
+    Nearest,
+    Linear,
+    Trilinear,
+}
+
+#[derive(Debug)]
+pub enum VertexAttributeKind {
+    F32,
+    #[cfg(feature = "debug_renderer")]
+    U8Norm,
+    U16Norm,
+    I32,
+    U16,
+}
+
+#[derive(Debug)]
+pub struct VertexAttribute {
+    pub name: &'static str,
+    pub count: u32,
+    pub kind: VertexAttributeKind,
+}
+
+#[derive(Debug)]
+pub struct VertexDescriptor {
+    pub vertex_attributes: &'static [VertexAttribute],
+    pub instance_attributes: &'static [VertexAttribute],
+}
+
+enum FBOTarget {
+    Read,
+    Draw,
+}
+
+/// Method of uploading texel data from CPU to GPU.
+#[derive(Debug, Clone)]
+pub enum UploadMethod {
+    /// Just call `glTexSubImage` directly with the CPU data pointer
+    Immediate,
+    /// Accumulate the changes in PBO first before transferring to a texture.
+    PixelBuffer(VertexUsageHint),
+}
+
+/// Plain old data that can be used to initialize a texture.
+pub unsafe trait Texel: Copy {}
+unsafe impl Texel for u8 {}
+unsafe impl Texel for f32 {}
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum ReadPixelsFormat {
+    Standard(ImageFormat),
+    Rgba8,
+}
+
+pub fn get_gl_target(target: TextureTarget) -> gl::GLuint {
+    match target {
+        TextureTarget::Default => gl::TEXTURE_2D,
+        TextureTarget::Array => gl::TEXTURE_2D_ARRAY,
+        TextureTarget::Rect => gl::TEXTURE_RECTANGLE,
+        TextureTarget::External => gl::TEXTURE_EXTERNAL_OES,
+    }
+}
+
+fn supports_extension(extensions: &[String], extension: &str) -> bool {
+    extensions.iter().any(|s| s == extension)
+}
+
+fn get_shader_version(gl: &gl::Gl) -> &'static str {
+    match gl.get_type() {
+        gl::GlType::Gl => SHADER_VERSION_GL,
+        gl::GlType::Gles => SHADER_VERSION_GLES,
+    }
+}
+
+// Get a shader string by name, from the built in resources or
+// an override path, if supplied.
+fn get_shader_source(shader_name: &str, base_path: &Option<PathBuf>) -> Option<String> {
+    if let Some(ref base) = *base_path {
+        let shader_path = base.join(&format!("{}.glsl", shader_name));
+        if shader_path.exists() {
+            let mut source = String::new();
+            File::open(&shader_path)
+                .unwrap()
+                .read_to_string(&mut source)
+                .unwrap();
+            return Some(source);
+        }
+    }
+
+    shader_source::SHADERS
+        .get(shader_name)
+        .map(|s| s.to_string())
+}
+
+// Parse a shader string for imports. Imports are recursively processed, and
+// prepended to the list of outputs.
+fn parse_shader_source(source: String, base_path: &Option<PathBuf>, output: &mut String) {
+    for line in source.lines() {
+        if line.starts_with(SHADER_IMPORT) {
+            let imports = line[SHADER_IMPORT.len() ..].split(',');
+
+            // For each import, get the source, and recurse.
+            for import in imports {
+                if let Some(include) = get_shader_source(import, base_path) {
+                    parse_shader_source(include, base_path, output);
+                }
+            }
+        } else {
+            output.push_str(line);
+            output.push_str("\n");
+        }
+    }
+}
+
+pub fn build_shader_strings(
+    gl_version_string: &str,
+    features: &str,
+    base_filename: &str,
+    override_path: &Option<PathBuf>,
+) -> (String, String) {
+    // Construct a list of strings to be passed to the shader compiler.
+    let mut vs_source = String::new();
+    let mut fs_source = String::new();
+
+    // GLSL requires that the version number comes first.
+    vs_source.push_str(gl_version_string);
+    fs_source.push_str(gl_version_string);
+
+    // Insert the shader name to make debugging easier.
+    let name_string = format!("// {}\n", base_filename);
+    vs_source.push_str(&name_string);
+    fs_source.push_str(&name_string);
+
+    // Define a constant depending on whether we are compiling VS or FS.
+    vs_source.push_str(SHADER_KIND_VERTEX);
+    fs_source.push_str(SHADER_KIND_FRAGMENT);
+
+    // Add any defines that were passed by the caller.
+    vs_source.push_str(features);
+    fs_source.push_str(features);
+
+    // Parse the main .glsl file, including any imports
+    // and append them to the list of sources.
+    let mut shared_result = String::new();
+    if let Some(shared_source) = get_shader_source(base_filename, override_path) {
+        parse_shader_source(shared_source, override_path, &mut shared_result);
+    }
+
+    vs_source.push_str(&shared_result);
+    fs_source.push_str(&shared_result);
+
+    (vs_source, fs_source)
+}
+
+pub trait FileWatcherHandler: Send {
+    fn file_changed(&self, path: PathBuf);
+}
+
+impl VertexAttributeKind {
+    fn size_in_bytes(&self) -> u32 {
+        match *self {
+            VertexAttributeKind::F32 => 4,
+            #[cfg(feature = "debug_renderer")]
+            VertexAttributeKind::U8Norm => 1,
+            VertexAttributeKind::U16Norm => 2,
+            VertexAttributeKind::I32 => 4,
+            VertexAttributeKind::U16 => 2,
+        }
+    }
+}
+
+impl VertexAttribute {
+    fn size_in_bytes(&self) -> u32 {
+        self.count * self.kind.size_in_bytes()
+    }
+
+    fn bind_to_vao(
+        &self,
+        attr_index: gl::GLuint,
+        divisor: gl::GLuint,
+        stride: gl::GLint,
+        offset: gl::GLuint,
+        gl: &gl::Gl,
+    ) {
+        gl.enable_vertex_attrib_array(attr_index);
+        gl.vertex_attrib_divisor(attr_index, divisor);
+
+        match self.kind {
+            VertexAttributeKind::F32 => {
+                gl.vertex_attrib_pointer(
+                    attr_index,
+                    self.count as gl::GLint,
+                    gl::FLOAT,
+                    false,
+                    stride,
+                    offset,
+                );
+            }
+            #[cfg(feature = "debug_renderer")]
+            VertexAttributeKind::U8Norm => {
+                gl.vertex_attrib_pointer(
+                    attr_index,
+                    self.count as gl::GLint,
+                    gl::UNSIGNED_BYTE,
+                    true,
+                    stride,
+                    offset,
+                );
+            }
+            VertexAttributeKind::U16Norm => {
+                gl.vertex_attrib_pointer(
+                    attr_index,
+                    self.count as gl::GLint,
+                    gl::UNSIGNED_SHORT,
+                    true,
+                    stride,
+                    offset,
+                );
+            }
+            VertexAttributeKind::I32 => {
+                gl.vertex_attrib_i_pointer(
+                    attr_index,
+                    self.count as gl::GLint,
+                    gl::INT,
+                    stride,
+                    offset,
+                );
+            }
+            VertexAttributeKind::U16 => {
+                gl.vertex_attrib_i_pointer(
+                    attr_index,
+                    self.count as gl::GLint,
+                    gl::UNSIGNED_SHORT,
+                    stride,
+                    offset,
+                );
+            }
+        }
+    }
+}
+
+impl VertexDescriptor {
+    fn instance_stride(&self) -> u32 {
+        self.instance_attributes
+            .iter()
+            .map(|attr| attr.size_in_bytes())
+            .sum()
+    }
+
+    fn bind_attributes(
+        attributes: &[VertexAttribute],
+        start_index: usize,
+        divisor: u32,
+        gl: &gl::Gl,
+        vbo: VBOId,
+    ) {
+        vbo.bind(gl);
+
+        let stride: u32 = attributes
+            .iter()
+            .map(|attr| attr.size_in_bytes())
+            .sum();
+
+        let mut offset = 0;
+        for (i, attr) in attributes.iter().enumerate() {
+            let attr_index = (start_index + i) as gl::GLuint;
+            attr.bind_to_vao(attr_index, divisor, stride as _, offset, gl);
+            offset += attr.size_in_bytes();
+        }
+    }
+
+    fn bind(&self, gl: &gl::Gl, main: VBOId, instance: VBOId) {
+        Self::bind_attributes(self.vertex_attributes, 0, 0, gl, main);
+
+        if !self.instance_attributes.is_empty() {
+            Self::bind_attributes(
+                self.instance_attributes,
+                self.vertex_attributes.len(),
+                1, gl, instance,
+            );
+        }
+    }
+}
+
+impl VBOId {
+    fn bind(&self, gl: &gl::Gl) {
+        gl.bind_buffer(gl::ARRAY_BUFFER, self.0);
+    }
+}
+
+impl IBOId {
+    fn bind(&self, gl: &gl::Gl) {
+        gl.bind_buffer(gl::ELEMENT_ARRAY_BUFFER, self.0);
+    }
+}
+
+impl FBOId {
+    fn bind(&self, gl: &gl::Gl, target: FBOTarget) {
+        let target = match target {
+            FBOTarget::Read => gl::READ_FRAMEBUFFER,
+            FBOTarget::Draw => gl::DRAW_FRAMEBUFFER,
+        };
+        gl.bind_framebuffer(target, self.0);
+    }
+}
+
+pub struct Stream<'a> {
+    attributes: &'a [VertexAttribute],
+    vbo: VBOId,
+}
+
+pub struct VBO<V> {
+    id: gl::GLuint,
+    target: gl::GLenum,
+    allocated_count: usize,
+    marker: PhantomData<V>,
+}
+
+impl<V> VBO<V> {
+    pub fn allocated_count(&self) -> usize {
+        self.allocated_count
+    }
+
+    pub fn stream_with<'a>(&self, attributes: &'a [VertexAttribute]) -> Stream<'a> {
+        debug_assert_eq!(
+            mem::size_of::<V>(),
+            attributes.iter().map(|a| a.size_in_bytes() as usize).sum::<usize>()
+        );
+        Stream {
+            attributes,
+            vbo: VBOId(self.id),
+        }
+    }
+}
+
+impl<T> Drop for VBO<T> {
+    fn drop(&mut self) {
+        debug_assert!(thread::panicking() || self.id == 0);
+    }
+}
+
+#[cfg_attr(feature = "replay", derive(Clone))]
+pub struct ExternalTexture {
+    id: gl::GLuint,
+    target: gl::GLuint,
+}
+
+impl ExternalTexture {
+    pub fn new(id: u32, target: TextureTarget) -> Self {
+        ExternalTexture {
+            id,
+            target: get_gl_target(target),
+        }
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn internal_id(&self) -> gl::GLuint {
+        self.id
+    }
+}
+
+pub struct Texture {
+    id: gl::GLuint,
+    target: gl::GLuint,
+    layer_count: i32,
+    format: ImageFormat,
+    width: u32,
+    height: u32,
+    filter: TextureFilter,
+    render_target: Option<RenderTargetInfo>,
+    fbo_ids: Vec<FBOId>,
+    depth_rb: Option<RBOId>,
+    last_frame_used: FrameId,
+}
+
+impl Texture {
+    pub fn get_dimensions(&self) -> DeviceUintSize {
+        DeviceUintSize::new(self.width, self.height)
+    }
+
+    pub fn get_render_target_layer_count(&self) -> usize {
+        self.fbo_ids.len()
+    }
+
+    pub fn get_layer_count(&self) -> i32 {
+        self.layer_count
+    }
+
+    pub fn get_format(&self) -> ImageFormat {
+        self.format
+    }
+
+    #[cfg(any(feature = "debug_renderer", feature = "capture"))]
+    pub fn get_filter(&self) -> TextureFilter {
+        self.filter
+    }
+
+    #[cfg(any(feature = "debug_renderer", feature = "capture"))]
+    pub fn get_render_target(&self) -> Option<RenderTargetInfo> {
+        self.render_target.clone()
+    }
+
+    pub fn has_depth(&self) -> bool {
+        self.depth_rb.is_some()
+    }
+
+    pub fn get_rt_info(&self) -> Option<&RenderTargetInfo> {
+        self.render_target.as_ref()
+    }
+
+    pub fn used_in_frame(&self, frame_id: FrameId) -> bool {
+        self.last_frame_used == frame_id
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn into_external(mut self) -> ExternalTexture {
+        let ext = ExternalTexture {
+            id: self.id,
+            target: self.target,
+        };
+        self.id = 0; // don't complain, moved out
+        ext
+    }
+}
+
+impl Drop for Texture {
+    fn drop(&mut self) {
+        debug_assert!(thread::panicking() || self.id == 0);
+    }
+}
+
+pub struct Program {
+    id: gl::GLuint,
+    u_transform: gl::GLint,
+    u_device_pixel_ratio: gl::GLint,
+    u_mode: gl::GLint,
+}
+
+impl Drop for Program {
+    fn drop(&mut self) {
+        debug_assert!(
+            thread::panicking() || self.id == 0,
+            "renderer::deinit not called"
+        );
+    }
+}
+
+pub struct CustomVAO {
+    id: gl::GLuint,
+}
+
+impl Drop for CustomVAO {
+    fn drop(&mut self) {
+        debug_assert!(
+            thread::panicking() || self.id == 0,
+            "renderer::deinit not called"
+        );
+    }
+}
+
+pub struct VAO {
+    id: gl::GLuint,
+    ibo_id: IBOId,
+    main_vbo_id: VBOId,
+    instance_vbo_id: VBOId,
+    instance_stride: usize,
+    owns_vertices_and_indices: bool,
+}
+
+impl Drop for VAO {
+    fn drop(&mut self) {
+        debug_assert!(
+            thread::panicking() || self.id == 0,
+            "renderer::deinit not called"
+        );
+    }
+}
+
+pub struct PBO {
+    id: gl::GLuint,
+}
+
+impl Drop for PBO {
+    fn drop(&mut self) {
+        debug_assert!(
+            thread::panicking() || self.id == 0,
+            "renderer::deinit not called"
+        );
+    }
+}
+
+#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
+pub struct FBOId(gl::GLuint);
+
+#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
+pub struct RBOId(gl::GLuint);
+
+#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
+pub struct VBOId(gl::GLuint);
+
+#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
+struct IBOId(gl::GLuint);
+
+#[derive(Clone, PartialEq, Eq, Hash, Debug)]
+#[cfg_attr(feature = "serialize_program", derive(Deserialize, Serialize))]
+pub struct ProgramSources {
+    renderer_name: String,
+    vs_source: String,
+    fs_source: String,
+}
+
+impl ProgramSources {
+    fn new(renderer_name: String, vs_source: String, fs_source: String) -> Self {
+        ProgramSources {
+            renderer_name,
+            vs_source,
+            fs_source,
+        }
+    }
+}
+
+#[cfg_attr(feature = "serialize_program", derive(Deserialize, Serialize))]
+pub struct ProgramBinary {
+    binary: Vec<u8>,
+    format: gl::GLenum,
+    #[cfg(feature = "serialize_program")]
+    sources: ProgramSources,
+}
+
+impl ProgramBinary {
+    #[allow(unused_variables)]
+    fn new(binary: Vec<u8>,
+           format: gl::GLenum,
+           sources: &ProgramSources) -> Self {
+        ProgramBinary {
+            binary,
+            format,
+            #[cfg(feature = "serialize_program")]
+            sources: sources.clone(),
+        }
+    }
+}
+
+/// The interfaces that an application can implement to handle ProgramCache update
+pub trait ProgramCacheObserver {
+    fn notify_binary_added(&self, program_binary: &Arc<ProgramBinary>);
+    fn notify_program_binary_failed(&self, program_binary: &Arc<ProgramBinary>);
+}
+
+pub struct ProgramCache {
+    binaries: RefCell<FastHashMap<ProgramSources, Arc<ProgramBinary>>>,
+
+    /// Optional trait object that allows the client
+    /// application to handle ProgramCache updating
+    program_cache_handler: Option<Box<ProgramCacheObserver>>,
+}
+
+impl ProgramCache {
+    pub fn new(program_cache_observer: Option<Box<ProgramCacheObserver>>) -> Rc<Self> {
+        Rc::new(
+            ProgramCache {
+                binaries: RefCell::new(FastHashMap::default()),
+                program_cache_handler: program_cache_observer,
+            }
+        )
+    }
+    /// Load ProgramBinary to ProgramCache.
+    /// The function is typically used to load ProgramBinary from disk.
+    #[cfg(feature = "serialize_program")]
+    pub fn load_program_binary(&self, program_binary: Arc<ProgramBinary>) {
+        let sources = program_binary.sources.clone();
+        self.binaries.borrow_mut().insert(sources, program_binary);
+    }
+}
+
+#[derive(Debug, Copy, Clone)]
+pub enum VertexUsageHint {
+    Static,
+    Dynamic,
+    Stream,
+}
+
+impl VertexUsageHint {
+    fn to_gl(&self) -> gl::GLuint {
+        match *self {
+            VertexUsageHint::Static => gl::STATIC_DRAW,
+            VertexUsageHint::Dynamic => gl::DYNAMIC_DRAW,
+            VertexUsageHint::Stream => gl::STREAM_DRAW,
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug)]
+pub struct UniformLocation(gl::GLint);
+
+impl UniformLocation {
+    pub const INVALID: Self = UniformLocation(-1);
+}
+
+#[cfg(feature = "debug_renderer")]
+pub struct Capabilities {
+    pub supports_multisampling: bool,
+}
+
+#[derive(Clone, Debug)]
+pub enum ShaderError {
+    Compilation(String, String), // name, error message
+    Link(String, String),        // name, error message
+}
+
+pub struct Device {
+    gl: Rc<gl::Gl>,
+    // device state
+    bound_textures: [gl::GLuint; 16],
+    bound_program: gl::GLuint,
+    bound_vao: gl::GLuint,
+    bound_read_fbo: FBOId,
+    bound_draw_fbo: FBOId,
+    program_mode_id: UniformLocation,
+    default_read_fbo: gl::GLuint,
+    default_draw_fbo: gl::GLuint,
+
+    device_pixel_ratio: f32,
+    upload_method: UploadMethod,
+
+    // HW or API capabilities
+    #[cfg(feature = "debug_renderer")]
+    capabilities: Capabilities,
+
+    bgra_format: gl::GLuint,
+
+    // debug
+    inside_frame: bool,
+
+    // resources
+    resource_override_path: Option<PathBuf>,
+
+    max_texture_size: u32,
+    renderer_name: String,
+    cached_programs: Option<Rc<ProgramCache>>,
+
+    // Frame counter. This is used to map between CPU
+    // frames and GPU frames.
+    frame_id: FrameId,
+
+    // GL extensions
+    extensions: Vec<String>,
+}
+
+impl Device {
+    pub fn new(
+        gl: Rc<gl::Gl>,
+        resource_override_path: Option<PathBuf>,
+        upload_method: UploadMethod,
+        _file_changed_handler: Box<FileWatcherHandler>,
+        cached_programs: Option<Rc<ProgramCache>>,
+    ) -> Device {
+        let mut max_texture_size = [0];
+        unsafe {
+            gl.get_integer_v(gl::MAX_TEXTURE_SIZE, &mut max_texture_size);
+        }
+        let max_texture_size = max_texture_size[0] as u32;
+        let renderer_name = gl.get_string(gl::RENDERER);
+
+        let mut extension_count = [0];
+        unsafe {
+            gl.get_integer_v(gl::NUM_EXTENSIONS, &mut extension_count);
+        }
+        let extension_count = extension_count[0] as gl::GLuint;
+        let mut extensions = Vec::new();
+        for i in 0 .. extension_count {
+            extensions.push(gl.get_string_i(gl::EXTENSIONS, i));
+        }
+
+        let supports_bgra = supports_extension(&extensions, "GL_EXT_texture_format_BGRA8888");
+        let bgra_format = match gl.get_type() {
+            gl::GlType::Gl => GL_FORMAT_BGRA_GL,
+            gl::GlType::Gles => if supports_bgra {
+                GL_FORMAT_BGRA_GLES
+            } else {
+                GL_FORMAT_RGBA
+            }
+        };
+
+        Device {
+            gl,
+            resource_override_path,
+            // This is initialized to 1 by default, but it is reset
+            // at the beginning of each frame in `Renderer::bind_frame_data`.
+            device_pixel_ratio: 1.0,
+            upload_method,
+            inside_frame: false,
+
+            #[cfg(feature = "debug_renderer")]
+            capabilities: Capabilities {
+                supports_multisampling: false, //TODO
+            },
+
+            bgra_format,
+
+            bound_textures: [0; 16],
+            bound_program: 0,
+            bound_vao: 0,
+            bound_read_fbo: FBOId(0),
+            bound_draw_fbo: FBOId(0),
+            program_mode_id: UniformLocation::INVALID,
+            default_read_fbo: 0,
+            default_draw_fbo: 0,
+
+            max_texture_size,
+            renderer_name,
+            cached_programs,
+            frame_id: FrameId(0),
+            extensions,
+        }
+    }
+
+    pub fn gl(&self) -> &gl::Gl {
+        &*self.gl
+    }
+
+    pub fn rc_gl(&self) -> &Rc<gl::Gl> {
+        &self.gl
+    }
+
+    pub fn set_device_pixel_ratio(&mut self, ratio: f32) {
+        self.device_pixel_ratio = ratio;
+    }
+
+    pub fn update_program_cache(&mut self, cached_programs: Rc<ProgramCache>) {
+        self.cached_programs = Some(cached_programs);
+    }
+
+    pub fn max_texture_size(&self) -> u32 {
+        self.max_texture_size
+    }
+
+    #[cfg(feature = "debug_renderer")]
+    pub fn get_capabilities(&self) -> &Capabilities {
+        &self.capabilities
+    }
+
+    pub fn reset_state(&mut self) {
+        self.bound_textures = [0; 16];
+        self.bound_vao = 0;
+        self.bound_read_fbo = FBOId(0);
+        self.bound_draw_fbo = FBOId(0);
+    }
+
+    #[cfg(debug_assertions)]
+    fn print_shader_errors(source: &str, log: &str) {
+        // hacky way to extract the offending lines
+        if !log.starts_with("0:") {
+            return;
+        }
+        let end_pos = match log[2..].chars().position(|c| !c.is_digit(10)) {
+            Some(pos) => 2 + pos,
+            None => return,
+        };
+        let base_line_number = match log[2 .. end_pos].parse::<usize>() {
+            Ok(number) if number >= 2 => number - 2,
+            _ => return,
+        };
+        for (line, prefix) in source.lines().skip(base_line_number).zip(&["|",">","|"]) {
+            error!("{}\t{}", prefix, line);
+        }
+    }
+
+    pub fn compile_shader(
+        gl: &gl::Gl,
+        name: &str,
+        shader_type: gl::GLenum,
+        source: &String,
+    ) -> Result<gl::GLuint, ShaderError> {
+        debug!("compile {}", name);
+        let id = gl.create_shader(shader_type);
+        gl.shader_source(id, &[source.as_bytes()]);
+        gl.compile_shader(id);
+        let log = gl.get_shader_info_log(id);
+        let mut status = [0];
+        unsafe {
+            gl.get_shader_iv(id, gl::COMPILE_STATUS, &mut status);
+        }
+        if status[0] == 0 {
+            error!("Failed to compile shader: {}\n{}", name, log);
+            #[cfg(debug_assertions)]
+            Self::print_shader_errors(source, &log);
+            Err(ShaderError::Compilation(name.to_string(), log))
+        } else {
+            if !log.is_empty() {
+                warn!("Warnings detected on shader: {}\n{}", name, log);
+            }
+            Ok(id)
+        }
+    }
+
+    pub fn begin_frame(&mut self) -> FrameId {
+        debug_assert!(!self.inside_frame);
+        self.inside_frame = true;
+
+        // Retrieve the currently set FBO.
+        let mut default_read_fbo = [0];
+        unsafe {
+            self.gl.get_integer_v(gl::READ_FRAMEBUFFER_BINDING, &mut default_read_fbo);
+        }
+        self.default_read_fbo = default_read_fbo[0] as gl::GLuint;
+        let mut default_draw_fbo = [0];
+        unsafe {
+            self.gl.get_integer_v(gl::DRAW_FRAMEBUFFER_BINDING, &mut default_draw_fbo);
+        }
+        self.default_draw_fbo = default_draw_fbo[0] as gl::GLuint;
+
+        // Texture state
+        for i in 0 .. self.bound_textures.len() {
+            self.bound_textures[i] = 0;
+            self.gl.active_texture(gl::TEXTURE0 + i as gl::GLuint);
+            self.gl.bind_texture(gl::TEXTURE_2D, 0);
+        }
+
+        // Shader state
+        self.bound_program = 0;
+        self.program_mode_id = UniformLocation::INVALID;
+        self.gl.use_program(0);
+
+        // Vertex state
+        self.bound_vao = 0;
+        self.gl.bind_vertex_array(0);
+
+        // FBO state
+        self.bound_read_fbo = FBOId(self.default_read_fbo);
+        self.bound_draw_fbo = FBOId(self.default_draw_fbo);
+
+        // Pixel op state
+        self.gl.pixel_store_i(gl::UNPACK_ALIGNMENT, 1);
+        self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
+
+        // Default is sampler 0, always
+        self.gl.active_texture(gl::TEXTURE0);
+
+        self.frame_id
+    }
+
+    fn bind_texture_impl(&mut self, slot: TextureSlot, id: gl::GLuint, target: gl::GLenum) {
+        debug_assert!(self.inside_frame);
+
+        if self.bound_textures[slot.0] != id {
+            self.bound_textures[slot.0] = id;
+            self.gl.active_texture(gl::TEXTURE0 + slot.0 as gl::GLuint);
+            self.gl.bind_texture(target, id);
+            self.gl.active_texture(gl::TEXTURE0);
+        }
+    }
+
+    pub fn bind_texture<S>(&mut self, sampler: S, texture: &Texture)
+    where
+        S: Into<TextureSlot>,
+    {
+        self.bind_texture_impl(sampler.into(), texture.id, texture.target);
+    }
+
+    pub fn bind_external_texture<S>(&mut self, sampler: S, external_texture: &ExternalTexture)
+    where
+        S: Into<TextureSlot>,
+    {
+        self.bind_texture_impl(sampler.into(), external_texture.id, external_texture.target);
+    }
+
+    pub fn bind_read_target_impl(&mut self, fbo_id: FBOId) {
+        debug_assert!(self.inside_frame);
+
+        if self.bound_read_fbo != fbo_id {
+            self.bound_read_fbo = fbo_id;
+            fbo_id.bind(self.gl(), FBOTarget::Read);
+        }
+    }
+
+    pub fn bind_read_target(&mut self, texture_and_layer: Option<(&Texture, i32)>) {
+        let fbo_id = texture_and_layer.map_or(FBOId(self.default_read_fbo), |texture_and_layer| {
+            texture_and_layer.0.fbo_ids[texture_and_layer.1 as usize]
+        });
+
+        self.bind_read_target_impl(fbo_id)
+    }
+
+    fn bind_draw_target_impl(&mut self, fbo_id: FBOId) {
+        debug_assert!(self.inside_frame);
+
+        if self.bound_draw_fbo != fbo_id {
+            self.bound_draw_fbo = fbo_id;
+            fbo_id.bind(self.gl(), FBOTarget::Draw);
+        }
+    }
+
+    pub fn bind_draw_target(
+        &mut self,
+        texture_and_layer: Option<(&Texture, i32)>,
+        dimensions: Option<DeviceUintSize>,
+    ) {
+        let fbo_id = texture_and_layer.map_or(FBOId(self.default_draw_fbo), |texture_and_layer| {
+            texture_and_layer.0.fbo_ids[texture_and_layer.1 as usize]
+        });
+
+        self.bind_draw_target_impl(fbo_id);
+
+        if let Some(dimensions) = dimensions {
+            self.gl.viewport(
+                0,
+                0,
+                dimensions.width as _,
+                dimensions.height as _,
+            );
+        }
+    }
+
+    pub fn create_fbo_for_external_texture(&mut self, texture_id: u32) -> FBOId {
+        let fbo = FBOId(self.gl.gen_framebuffers(1)[0]);
+        fbo.bind(self.gl(), FBOTarget::Draw);
+        self.gl.framebuffer_texture_2d(
+            gl::DRAW_FRAMEBUFFER,
+            gl::COLOR_ATTACHMENT0,
+            gl::TEXTURE_2D,
+            texture_id,
+            0,
+        );
+        self.bound_draw_fbo.bind(self.gl(), FBOTarget::Draw);
+        fbo
+    }
+
+    pub fn delete_fbo(&mut self, fbo: FBOId) {
+        self.gl.delete_framebuffers(&[fbo.0]);
+    }
+
+    pub fn bind_external_draw_target(&mut self, fbo_id: FBOId) {
+        debug_assert!(self.inside_frame);
+
+        if self.bound_draw_fbo != fbo_id {
+            self.bound_draw_fbo = fbo_id;
+            fbo_id.bind(self.gl(), FBOTarget::Draw);
+        }
+    }
+
+    pub fn bind_program(&mut self, program: &Program) {
+        debug_assert!(self.inside_frame);
+
+        if self.bound_program != program.id {
+            self.gl.use_program(program.id);
+            self.bound_program = program.id;
+            self.program_mode_id = UniformLocation(program.u_mode);
+        }
+    }
+
+    pub fn create_texture(
+        &mut self,
+        target: TextureTarget,
+        format: ImageFormat,
+    ) -> Texture {
+        Texture {
+            id: self.gl.gen_textures(1)[0],
+            target: get_gl_target(target),
+            width: 0,
+            height: 0,
+            layer_count: 0,
+            format,
+            filter: TextureFilter::Nearest,
+            render_target: None,
+            fbo_ids: vec![],
+            depth_rb: None,
+            last_frame_used: self.frame_id,
+        }
+    }
+
+    fn set_texture_parameters(&mut self, target: gl::GLuint, filter: TextureFilter) {
+        let mag_filter = match filter {
+            TextureFilter::Nearest => gl::NEAREST,
+            TextureFilter::Linear | TextureFilter::Trilinear => gl::LINEAR,
+        };
+
+        let min_filter = match filter {
+            TextureFilter::Nearest => gl::NEAREST,
+            TextureFilter::Linear => gl::LINEAR,
+            TextureFilter::Trilinear => gl::LINEAR_MIPMAP_LINEAR,
+        };
+
+        self.gl
+            .tex_parameter_i(target, gl::TEXTURE_MAG_FILTER, mag_filter as gl::GLint);
+        self.gl
+            .tex_parameter_i(target, gl::TEXTURE_MIN_FILTER, min_filter as gl::GLint);
+
+        self.gl
+            .tex_parameter_i(target, gl::TEXTURE_WRAP_S, gl::CLAMP_TO_EDGE as gl::GLint);
+        self.gl
+            .tex_parameter_i(target, gl::TEXTURE_WRAP_T, gl::CLAMP_TO_EDGE as gl::GLint);
+    }
+
+    /// Resizes a texture with enabled render target views,
+    /// preserves the data by blitting the old texture contents over.
+    pub fn resize_renderable_texture(
+        &mut self,
+        texture: &mut Texture,
+        new_size: DeviceUintSize,
+    ) {
+        debug_assert!(self.inside_frame);
+
+        let old_size = texture.get_dimensions();
+        let old_fbos = mem::replace(&mut texture.fbo_ids, Vec::new());
+        let old_texture_id = mem::replace(&mut texture.id, self.gl.gen_textures(1)[0]);
+
+        texture.width = new_size.width;
+        texture.height = new_size.height;
+        let rt_info = texture.render_target
+            .clone()
+            .expect("Only renderable textures are expected for resize here");
+
+        self.bind_texture(DEFAULT_TEXTURE, texture);
+        self.set_texture_parameters(texture.target, texture.filter);
+        self.update_target_storage::<u8>(texture, &rt_info, true, None);
+
+        let rect = DeviceIntRect::new(DeviceIntPoint::zero(), old_size.to_i32());
+        for (read_fbo, &draw_fbo) in old_fbos.into_iter().zip(&texture.fbo_ids) {
+            self.bind_read_target_impl(read_fbo);
+            self.bind_draw_target_impl(draw_fbo);
+            self.blit_render_target(rect, rect);
+            self.delete_fbo(read_fbo);
+        }
+        self.gl.delete_textures(&[old_texture_id]);
+        self.bind_read_target(None);
+    }
+
+    pub fn init_texture<T: Texel>(
+        &mut self,
+        texture: &mut Texture,
+        mut width: u32,
+        mut height: u32,
+        filter: TextureFilter,
+        render_target: Option<RenderTargetInfo>,
+        layer_count: i32,
+        pixels: Option<&[T]>,
+    ) {
+        debug_assert!(self.inside_frame);
+
+        if width > self.max_texture_size || height > self.max_texture_size {
+            error!("Attempting to allocate a texture of size {}x{} above the limit, trimming", width, height);
+            width = width.min(self.max_texture_size);
+            height = height.min(self.max_texture_size);
+        }
+
+        let is_resized = texture.width != width || texture.height != height;
+
+        texture.width = width;
+        texture.height = height;
+        texture.filter = filter;
+        texture.layer_count = layer_count;
+        texture.render_target = render_target;
+        texture.last_frame_used = self.frame_id;
+
+        self.bind_texture(DEFAULT_TEXTURE, texture);
+        self.set_texture_parameters(texture.target, filter);
+
+        match render_target {
+            Some(info) => {
+                self.update_target_storage(texture, &info, is_resized, pixels);
+            }
+            None => {
+                self.update_texture_storage(texture, pixels);
+            }
+        }
+    }
+
+    /// Updates the render target storage for the texture, creating FBOs as required.
+    fn update_target_storage<T: Texel>(
+        &mut self,
+        texture: &mut Texture,
+        rt_info: &RenderTargetInfo,
+        is_resized: bool,
+        pixels: Option<&[T]>,
+    ) {
+        assert!(texture.layer_count > 0 || texture.width + texture.height == 0);
+
+        let needed_layer_count = texture.layer_count - texture.fbo_ids.len() as i32;
+        let allocate_color = needed_layer_count != 0 || is_resized || pixels.is_some();
+
+        if allocate_color {
+            let desc = self.gl_describe_format(texture.format);
+            match texture.target {
+                gl::TEXTURE_2D_ARRAY => {
+                    self.gl.tex_image_3d(
+                        texture.target,
+                        0,
+                        desc.internal,
+                        texture.width as _,
+                        texture.height as _,
+                        texture.layer_count,
+                        0,
+                        desc.external,
+                        desc.pixel_type,
+                        pixels.map(texels_to_u8_slice),
+                    )
+                }
+                _ => {
+                    assert_eq!(texture.layer_count, 1);
+                    self.gl.tex_image_2d(
+                        texture.target,
+                        0,
+                        desc.internal,
+                        texture.width as _,
+                        texture.height as _,
+                        0,
+                        desc.external,
+                        desc.pixel_type,
+                        pixels.map(texels_to_u8_slice),
+                    )
+                }
+            }
+        }
+
+        if needed_layer_count > 0 {
+            // Create more framebuffers to fill the gap
+            let new_fbos = self.gl.gen_framebuffers(needed_layer_count);
+            texture
+                .fbo_ids
+                .extend(new_fbos.into_iter().map(FBOId));
+        } else if needed_layer_count < 0 {
+            // Remove extra framebuffers
+            for old in texture.fbo_ids.drain(texture.layer_count as usize ..) {
+                self.gl.delete_framebuffers(&[old.0]);
+            }
+        }
+
+        let (mut depth_rb, allocate_depth) = match texture.depth_rb {
+            Some(rbo) => (rbo.0, is_resized || !rt_info.has_depth),
+            None if rt_info.has_depth => {
+                let renderbuffer_ids = self.gl.gen_renderbuffers(1);
+                let depth_rb = renderbuffer_ids[0];
+                texture.depth_rb = Some(RBOId(depth_rb));
+                (depth_rb, true)
+            },
+            None => (0, false),
+        };
+
+        if allocate_depth {
+            if rt_info.has_depth {
+                self.gl.bind_renderbuffer(gl::RENDERBUFFER, depth_rb);
+                self.gl.renderbuffer_storage(
+                    gl::RENDERBUFFER,
+                    gl::DEPTH_COMPONENT24,
+                    texture.width as _,
+                    texture.height as _,
+                );
+            } else {
+                self.gl.delete_renderbuffers(&[depth_rb]);
+                depth_rb = 0;
+                texture.depth_rb = None;
+            }
+        }
+
+        if allocate_color || allocate_depth {
+            let original_bound_fbo = self.bound_draw_fbo;
+            for (fbo_index, &fbo_id) in texture.fbo_ids.iter().enumerate() {
+                self.bind_external_draw_target(fbo_id);
+                match texture.target {
+                    gl::TEXTURE_2D_ARRAY => {
+                        self.gl.framebuffer_texture_layer(
+                            gl::DRAW_FRAMEBUFFER,
+                            gl::COLOR_ATTACHMENT0,
+                            texture.id,
+                            0,
+                            fbo_index as _,
+                        )
+                    }
+                    _ => {
+                        assert_eq!(fbo_index, 0);
+                        self.gl.framebuffer_texture_2d(
+                            gl::DRAW_FRAMEBUFFER,
+                            gl::COLOR_ATTACHMENT0,
+                            texture.target,
+                            texture.id,
+                            0,
+                        )
+                    }
+                }
+
+                self.gl.framebuffer_renderbuffer(
+                    gl::DRAW_FRAMEBUFFER,
+                    gl::DEPTH_ATTACHMENT,
+                    gl::RENDERBUFFER,
+                    depth_rb,
+                );
+            }
+            self.bind_external_draw_target(original_bound_fbo);
+        }
+    }
+
+    fn update_texture_storage<T: Texel>(&mut self, texture: &Texture, pixels: Option<&[T]>) {
+        let desc = self.gl_describe_format(texture.format);
+        match texture.target {
+            gl::TEXTURE_2D_ARRAY => {
+                self.gl.tex_image_3d(
+                    gl::TEXTURE_2D_ARRAY,
+                    0,
+                    desc.internal,
+                    texture.width as _,
+                    texture.height as _,
+                    texture.layer_count,
+                    0,
+                    desc.external,
+                    desc.pixel_type,
+                    pixels.map(texels_to_u8_slice),
+                );
+            }
+            gl::TEXTURE_2D | gl::TEXTURE_RECTANGLE | gl::TEXTURE_EXTERNAL_OES => {
+                self.gl.tex_image_2d(
+                    texture.target,
+                    0,
+                    desc.internal,
+                    texture.width as _,
+                    texture.height as _,
+                    0,
+                    desc.external,
+                    desc.pixel_type,
+                    pixels.map(texels_to_u8_slice),
+                );
+            }
+            _ => panic!("BUG: Unexpected texture target!"),
+        }
+    }
+
+    pub fn blit_render_target(&mut self, src_rect: DeviceIntRect, dest_rect: DeviceIntRect) {
+        debug_assert!(self.inside_frame);
+
+        self.gl.blit_framebuffer(
+            src_rect.origin.x,
+            src_rect.origin.y,
+            src_rect.origin.x + src_rect.size.width,
+            src_rect.origin.y + src_rect.size.height,
+            dest_rect.origin.x,
+            dest_rect.origin.y,
+            dest_rect.origin.x + dest_rect.size.width,
+            dest_rect.origin.y + dest_rect.size.height,
+            gl::COLOR_BUFFER_BIT,
+            gl::LINEAR,
+        );
+    }
+
+    fn free_texture_storage_impl(&mut self, target: gl::GLenum, desc: FormatDesc) {
+        match target {
+            gl::TEXTURE_2D_ARRAY => {
+                self.gl.tex_image_3d(
+                    gl::TEXTURE_2D_ARRAY,
+                    0,
+                    desc.internal,
+                    0,
+                    0,
+                    0,
+                    0,
+                    desc.external,
+                    desc.pixel_type,
+                    None,
+                );
+            }
+            _ => {
+                self.gl.tex_image_2d(
+                    target,
+                    0,
+                    desc.internal,
+                    0,
+                    0,
+                    0,
+                    desc.external,
+                    desc.pixel_type,
+                    None,
+                );
+            }
+        }
+    }
+
+    pub fn free_texture_storage(&mut self, texture: &mut Texture) {
+        debug_assert!(self.inside_frame);
+
+        if texture.width + texture.height == 0 {
+            return;
+        }
+
+        self.bind_texture(DEFAULT_TEXTURE, texture);
+        let desc = self.gl_describe_format(texture.format);
+
+        self.free_texture_storage_impl(texture.target, desc);
+
+        if let Some(RBOId(depth_rb)) = texture.depth_rb.take() {
+            self.gl.delete_renderbuffers(&[depth_rb]);
+        }
+
+        if !texture.fbo_ids.is_empty() {
+            let fbo_ids: Vec<_> = texture
+                .fbo_ids
+                .drain(..)
+                .map(|FBOId(fbo_id)| fbo_id)
+                .collect();
+            self.gl.delete_framebuffers(&fbo_ids[..]);
+        }
+
+        texture.width = 0;
+        texture.height = 0;
+        texture.layer_count = 0;
+    }
+
+    pub fn delete_texture(&mut self, mut texture: Texture) {
+        self.free_texture_storage(&mut texture);
+        self.gl.delete_textures(&[texture.id]);
+
+        for bound_texture in &mut self.bound_textures {
+            if *bound_texture == texture.id {
+                *bound_texture = 0
+            }
+        }
+
+        texture.id = 0;
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn delete_external_texture(&mut self, mut external: ExternalTexture) {
+        self.bind_external_texture(DEFAULT_TEXTURE, &external);
+        //Note: the format descriptor here doesn't really matter
+        self.free_texture_storage_impl(external.target, FormatDesc {
+            internal: gl::R8 as _,
+            external: gl::RED,
+            pixel_type: gl::UNSIGNED_BYTE,
+        });
+        self.gl.delete_textures(&[external.id]);
+        external.id = 0;
+    }
+
+    pub fn delete_program(&mut self, mut program: Program) {
+        self.gl.delete_program(program.id);
+        program.id = 0;
+    }
+
+    pub fn create_program(
+        &mut self,
+        base_filename: &str,
+        features: &str,
+        descriptor: &VertexDescriptor,
+    ) -> Result<Program, ShaderError> {
+        debug_assert!(self.inside_frame);
+
+        let gl_version_string = get_shader_version(&*self.gl);
+
+        let (vs_source, fs_source) = build_shader_strings(
+            gl_version_string,
+            features,
+            base_filename,
+            &self.resource_override_path,
+        );
+
+        let sources = ProgramSources::new(self.renderer_name.clone(), vs_source, fs_source);
+
+        // Create program
+        let pid = self.gl.create_program();
+
+        let mut loaded = false;
+
+        if let Some(ref cached_programs) = self.cached_programs {
+            if let Some(binary) = cached_programs.binaries.borrow().get(&sources)
+            {
+                self.gl.program_binary(pid, binary.format, &binary.binary);
+
+                let mut link_status = [0];
+                unsafe {
+                    self.gl.get_program_iv(pid, gl::LINK_STATUS, &mut link_status);
+                }
+                if link_status[0] == 0 {
+                    let error_log = self.gl.get_program_info_log(pid);
+                    error!(
+                      "Failed to load a program object with a program binary: {} renderer {}\n{}",
+                      base_filename,
+                      self.renderer_name,
+                      error_log
+                    );
+                    if let Some(ref program_cache_handler) = cached_programs.program_cache_handler {
+                        program_cache_handler.notify_program_binary_failed(&binary);
+                    }
+                } else {
+                    loaded = true;
+                }
+            }
+        }
+
+        if loaded == false {
+            // Compile the vertex shader
+            let vs_id =
+                match Device::compile_shader(&*self.gl, base_filename, gl::VERTEX_SHADER, &sources.vs_source) {
+                    Ok(vs_id) => vs_id,
+                    Err(err) => return Err(err),
+                };
+
+            // Compiler the fragment shader
+            let fs_id =
+                match Device::compile_shader(&*self.gl, base_filename, gl::FRAGMENT_SHADER, &sources.fs_source) {
+                    Ok(fs_id) => fs_id,
+                    Err(err) => {
+                        self.gl.delete_shader(vs_id);
+                        return Err(err);
+                    }
+                };
+
+            // Attach shaders
+            self.gl.attach_shader(pid, vs_id);
+            self.gl.attach_shader(pid, fs_id);
+
+            // Bind vertex attributes
+            for (i, attr) in descriptor
+                .vertex_attributes
+                .iter()
+                .chain(descriptor.instance_attributes.iter())
+                .enumerate()
+            {
+                self.gl
+                    .bind_attrib_location(pid, i as gl::GLuint, attr.name);
+            }
+
+            if self.cached_programs.is_some() {
+                self.gl.program_parameter_i(pid, gl::PROGRAM_BINARY_RETRIEVABLE_HINT, gl::TRUE as gl::GLint);
+            }
+
+            // Link!
+            self.gl.link_program(pid);
+
+            // GL recommends detaching and deleting shaders once the link
+            // is complete (whether successful or not). This allows the driver
+            // to free any memory associated with the parsing and compilation.
+            self.gl.detach_shader(pid, vs_id);
+            self.gl.detach_shader(pid, fs_id);
+            self.gl.delete_shader(vs_id);
+            self.gl.delete_shader(fs_id);
+
+            let mut link_status = [0];
+            unsafe {
+                self.gl.get_program_iv(pid, gl::LINK_STATUS, &mut link_status);
+            }
+            if link_status[0] == 0 {
+                let error_log = self.gl.get_program_info_log(pid);
+                error!(
+                    "Failed to link shader program: {}\n{}",
+                    base_filename,
+                    error_log
+                );
+                self.gl.delete_program(pid);
+                return Err(ShaderError::Link(base_filename.to_string(), error_log));
+            }
+        }
+
+        if let Some(ref cached_programs) = self.cached_programs {
+            if !cached_programs.binaries.borrow().contains_key(&sources) {
+                let (buffer, format) = self.gl.get_program_binary(pid);
+                if buffer.len() > 0 {
+                    let program_binary = Arc::new(ProgramBinary::new(buffer, format, &sources));
+                    if let Some(ref program_cache_handler) = cached_programs.program_cache_handler {
+                        program_cache_handler.notify_binary_added(&program_binary);
+                    }
+                    cached_programs.binaries.borrow_mut().insert(sources, program_binary);
+                }
+            }
+        }
+
+        let u_transform = self.gl.get_uniform_location(pid, "uTransform");
+        let u_device_pixel_ratio = self.gl.get_uniform_location(pid, "uDevicePixelRatio");
+        let u_mode = self.gl.get_uniform_location(pid, "uMode");
+
+        let program = Program {
+            id: pid,
+            u_transform,
+            u_device_pixel_ratio,
+            u_mode,
+        };
+
+        self.bind_program(&program);
+
+        Ok(program)
+    }
+
+    pub fn bind_shader_samplers<S>(&mut self, program: &Program, bindings: &[(&'static str, S)])
+    where
+        S: Into<TextureSlot> + Copy,
+    {
+        for binding in bindings {
+            let u_location = self.gl.get_uniform_location(program.id, binding.0);
+            if u_location != -1 {
+                self.bind_program(program);
+                self.gl
+                    .uniform_1i(u_location, binding.1.into().0 as gl::GLint);
+            }
+        }
+    }
+
+    #[cfg(feature = "debug_renderer")]
+    pub fn get_uniform_location(&self, program: &Program, name: &str) -> UniformLocation {
+        UniformLocation(self.gl.get_uniform_location(program.id, name))
+    }
+
+    pub fn set_uniforms(
+        &self,
+        program: &Program,
+        transform: &Transform3D<f32>,
+    ) {
+        debug_assert!(self.inside_frame);
+        self.gl
+            .uniform_matrix_4fv(program.u_transform, false, &transform.to_row_major_array());
+        self.gl
+            .uniform_1f(program.u_device_pixel_ratio, self.device_pixel_ratio);
+    }
+
+    pub fn switch_mode(&self, mode: i32) {
+        debug_assert!(self.inside_frame);
+        self.gl.uniform_1i(self.program_mode_id.0, mode);
+    }
+
+    pub fn create_pbo(&mut self) -> PBO {
+        let id = self.gl.gen_buffers(1)[0];
+        PBO { id }
+    }
+
+    pub fn delete_pbo(&mut self, mut pbo: PBO) {
+        self.gl.delete_buffers(&[pbo.id]);
+        pbo.id = 0;
+    }
+
+    pub fn upload_texture<'a, T>(
+        &'a mut self,
+        texture: &'a Texture,
+        pbo: &PBO,
+        upload_count: usize,
+    ) -> TextureUploader<'a, T> {
+        debug_assert!(self.inside_frame);
+        self.bind_texture(DEFAULT_TEXTURE, texture);
+
+        let buffer = match self.upload_method {
+            UploadMethod::Immediate => None,
+            UploadMethod::PixelBuffer(hint) => {
+                let upload_size = upload_count * mem::size_of::<T>();
+                self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, pbo.id);
+                if upload_size != 0 {
+                    self.gl.buffer_data_untyped(
+                        gl::PIXEL_UNPACK_BUFFER,
+                        upload_size as _,
+                        ptr::null(),
+                        hint.to_gl(),
+                    );
+                }
+                Some(PixelBuffer::new(hint.to_gl(), upload_size))
+            },
+        };
+
+        TextureUploader {
+            target: UploadTarget {
+                gl: &*self.gl,
+                bgra_format: self.bgra_format,
+                texture,
+            },
+            buffer,
+            marker: PhantomData,
+        }
+    }
+
+    #[cfg(any(feature = "debug_renderer", feature = "capture"))]
+    pub fn read_pixels(&mut self, img_desc: &ImageDescriptor) -> Vec<u8> {
+        let desc = self.gl_describe_format(img_desc.format);
+        self.gl.read_pixels(
+            0, 0,
+            img_desc.size.width as i32,
+            img_desc.size.height as i32,
+            desc.external,
+            desc.pixel_type,
+        )
+    }
+
+    /// Read rectangle of pixels into the specified output slice.
+    pub fn read_pixels_into(
+        &mut self,
+        rect: DeviceUintRect,
+        format: ReadPixelsFormat,
+        output: &mut [u8],
+    ) {
+        let (bytes_per_pixel, desc) = match format {
+            ReadPixelsFormat::Standard(imf) => {
+                (imf.bytes_per_pixel(), self.gl_describe_format(imf))
+            }
+            ReadPixelsFormat::Rgba8 => {
+                (4, FormatDesc {
+                    external: gl::RGBA,
+                    internal: gl::RGBA8 as _,
+                    pixel_type: gl::UNSIGNED_BYTE,
+                })
+            }
+        };
+        let size_in_bytes = (bytes_per_pixel * rect.size.width * rect.size.height) as usize;
+        assert_eq!(output.len(), size_in_bytes);
+
+        self.gl.flush();
+        self.gl.read_pixels_into_buffer(
+            rect.origin.x as _,
+            rect.origin.y as _,
+            rect.size.width as _,
+            rect.size.height as _,
+            desc.external,
+            desc.pixel_type,
+            output,
+        );
+    }
+
+    /// Get texels of a texture into the specified output slice.
+    #[cfg(feature = "debug_renderer")]
+    pub fn get_tex_image_into(
+        &mut self,
+        texture: &Texture,
+        format: ImageFormat,
+        output: &mut [u8],
+    ) {
+        self.bind_texture(DEFAULT_TEXTURE, texture);
+        let desc = self.gl_describe_format(format);
+        self.gl.get_tex_image_into_buffer(
+            texture.target,
+            0,
+            desc.external,
+            desc.pixel_type,
+            output,
+        );
+    }
+
+    /// Attaches the provided texture to the current Read FBO binding.
+    #[cfg(any(feature = "debug_renderer", feature="capture"))]
+    fn attach_read_texture_raw(
+        &mut self, texture_id: gl::GLuint, target: gl::GLuint, layer_id: i32
+    ) {
+        match target {
+            gl::TEXTURE_2D_ARRAY => {
+                self.gl.framebuffer_texture_layer(
+                    gl::READ_FRAMEBUFFER,
+                    gl::COLOR_ATTACHMENT0,
+                    texture_id,
+                    0,
+                    layer_id,
+                )
+            }
+            _ => {
+                assert_eq!(layer_id, 0);
+                self.gl.framebuffer_texture_2d(
+                    gl::READ_FRAMEBUFFER,
+                    gl::COLOR_ATTACHMENT0,
+                    target,
+                    texture_id,
+                    0,
+                )
+            }
+        }
+    }
+
+    #[cfg(any(feature = "debug_renderer", feature="capture"))]
+    pub fn attach_read_texture_external(
+        &mut self, texture_id: gl::GLuint, target: TextureTarget, layer_id: i32
+    ) {
+        self.attach_read_texture_raw(texture_id, get_gl_target(target), layer_id)
+    }
+
+    #[cfg(any(feature = "debug_renderer", feature="capture"))]
+    pub fn attach_read_texture(&mut self, texture: &Texture, layer_id: i32) {
+        self.attach_read_texture_raw(texture.id, texture.target, layer_id)
+    }
+
+    fn bind_vao_impl(&mut self, id: gl::GLuint) {
+        debug_assert!(self.inside_frame);
+
+        if self.bound_vao != id {
+            self.bound_vao = id;
+            self.gl.bind_vertex_array(id);
+        }
+    }
+
+    pub fn bind_vao(&mut self, vao: &VAO) {
+        self.bind_vao_impl(vao.id)
+    }
+
+    pub fn bind_custom_vao(&mut self, vao: &CustomVAO) {
+        self.bind_vao_impl(vao.id)
+    }
+
+    fn create_vao_with_vbos(
+        &mut self,
+        descriptor: &VertexDescriptor,
+        main_vbo_id: VBOId,
+        instance_vbo_id: VBOId,
+        ibo_id: IBOId,
+        owns_vertices_and_indices: bool,
+    ) -> VAO {
+        debug_assert!(self.inside_frame);
+
+        let instance_stride = descriptor.instance_stride() as usize;
+        let vao_id = self.gl.gen_vertex_arrays(1)[0];
+
+        self.gl.bind_vertex_array(vao_id);
+
+        descriptor.bind(self.gl(), main_vbo_id, instance_vbo_id);
+        ibo_id.bind(self.gl()); // force it to be a part of VAO
+
+        self.gl.bind_vertex_array(0);
+
+        VAO {
+            id: vao_id,
+            ibo_id,
+            main_vbo_id,
+            instance_vbo_id,
+            instance_stride,
+            owns_vertices_and_indices,
+        }
+    }
+
+    pub fn create_custom_vao(
+        &mut self,
+        streams: &[Stream],
+    ) -> CustomVAO {
+        debug_assert!(self.inside_frame);
+
+        let vao_id = self.gl.gen_vertex_arrays(1)[0];
+        self.gl.bind_vertex_array(vao_id);
+
+        let mut attrib_index = 0;
+        for stream in streams {
+            VertexDescriptor::bind_attributes(
+                stream.attributes,
+                attrib_index,
+                0,
+                self.gl(),
+                stream.vbo,
+            );
+            attrib_index += stream.attributes.len();
+        }
+
+        self.gl.bind_vertex_array(0);
+
+        CustomVAO {
+            id: vao_id,
+        }
+    }
+
+    pub fn delete_custom_vao(&mut self, mut vao: CustomVAO) {
+        self.gl.delete_vertex_arrays(&[vao.id]);
+        vao.id = 0;
+    }
+
+    pub fn create_vbo<T>(&mut self) -> VBO<T> {
+        let ids = self.gl.gen_buffers(1);
+        VBO {
+            id: ids[0],
+            target: gl::ARRAY_BUFFER,
+            allocated_count: 0,
+            marker: PhantomData,
+        }
+    }
+
+    pub fn delete_vbo<T>(&mut self, mut vbo: VBO<T>) {
+        self.gl.delete_buffers(&[vbo.id]);
+        vbo.id = 0;
+    }
+
+    pub fn create_vao(&mut self, descriptor: &VertexDescriptor) -> VAO {
+        debug_assert!(self.inside_frame);
+
+        let buffer_ids = self.gl.gen_buffers(3);
+        let ibo_id = IBOId(buffer_ids[0]);
+        let main_vbo_id = VBOId(buffer_ids[1]);
+        let intance_vbo_id = VBOId(buffer_ids[2]);
+
+        self.create_vao_with_vbos(descriptor, main_vbo_id, intance_vbo_id, ibo_id, true)
+    }
+
+    pub fn delete_vao(&mut self, mut vao: VAO) {
+        self.gl.delete_vertex_arrays(&[vao.id]);
+        vao.id = 0;
+
+        if vao.owns_vertices_and_indices {
+            self.gl.delete_buffers(&[vao.ibo_id.0]);
+            self.gl.delete_buffers(&[vao.main_vbo_id.0]);
+        }
+
+        self.gl.delete_buffers(&[vao.instance_vbo_id.0])
+    }
+
+    pub fn allocate_vbo<V>(
+        &mut self,
+        vbo: &mut VBO<V>,
+        count: usize,
+        usage_hint: VertexUsageHint,
+    ) {
+        debug_assert!(self.inside_frame);
+        vbo.allocated_count = count;
+
+        self.gl.bind_buffer(vbo.target, vbo.id);
+        self.gl.buffer_data_untyped(
+            vbo.target,
+            (count * mem::size_of::<V>()) as _,
+            ptr::null(),
+            usage_hint.to_gl(),
+        );
+    }
+
+    pub fn fill_vbo<V>(
+        &mut self,
+        vbo: &VBO<V>,
+        data: &[V],
+        offset: usize,
+    ) {
+        debug_assert!(self.inside_frame);
+        assert!(offset + data.len() <= vbo.allocated_count);
+        let stride = mem::size_of::<V>();
+
+        self.gl.bind_buffer(vbo.target, vbo.id);
+        self.gl.buffer_sub_data_untyped(
+            vbo.target,
+            (offset * stride) as _,
+            (data.len() * stride) as _,
+            data.as_ptr() as _,
+        );
+    }
+
+    fn update_vbo_data<V>(
+        &mut self,
+        vbo: VBOId,
+        vertices: &[V],
+        usage_hint: VertexUsageHint,
+    ) {
+        debug_assert!(self.inside_frame);
+
+        vbo.bind(self.gl());
+        gl::buffer_data(self.gl(), gl::ARRAY_BUFFER, vertices, usage_hint.to_gl());
+    }
+
+    pub fn create_vao_with_new_instances(
+        &mut self,
+        descriptor: &VertexDescriptor,
+        base_vao: &VAO,
+    ) -> VAO {
+        debug_assert!(self.inside_frame);
+
+        let buffer_ids = self.gl.gen_buffers(1);
+        let intance_vbo_id = VBOId(buffer_ids[0]);
+
+        self.create_vao_with_vbos(
+            descriptor,
+            base_vao.main_vbo_id,
+            intance_vbo_id,
+            base_vao.ibo_id,
+            false,
+        )
+    }
+
+    pub fn update_vao_main_vertices<V>(
+        &mut self,
+        vao: &VAO,
+        vertices: &[V],
+        usage_hint: VertexUsageHint,
+    ) {
+        debug_assert_eq!(self.bound_vao, vao.id);
+        self.update_vbo_data(vao.main_vbo_id, vertices, usage_hint)
+    }
+
+    pub fn update_vao_instances<V>(
+        &mut self,
+        vao: &VAO,
+        instances: &[V],
+        usage_hint: VertexUsageHint,
+    ) {
+        debug_assert_eq!(self.bound_vao, vao.id);
+        debug_assert_eq!(vao.instance_stride as usize, mem::size_of::<V>());
+
+        self.update_vbo_data(vao.instance_vbo_id, instances, usage_hint)
+    }
+
+    pub fn update_vao_indices<I>(&mut self, vao: &VAO, indices: &[I], usage_hint: VertexUsageHint) {
+        debug_assert!(self.inside_frame);
+        debug_assert_eq!(self.bound_vao, vao.id);
+
+        vao.ibo_id.bind(self.gl());
+        gl::buffer_data(
+            self.gl(),
+            gl::ELEMENT_ARRAY_BUFFER,
+            indices,
+            usage_hint.to_gl(),
+        );
+    }
+
+    pub fn draw_triangles_u16(&mut self, first_vertex: i32, index_count: i32) {
+        debug_assert!(self.inside_frame);
+        self.gl.draw_elements(
+            gl::TRIANGLES,
+            index_count,
+            gl::UNSIGNED_SHORT,
+            first_vertex as u32 * 2,
+        );
+    }
+
+    #[cfg(feature = "debug_renderer")]
+    pub fn draw_triangles_u32(&mut self, first_vertex: i32, index_count: i32) {
+        debug_assert!(self.inside_frame);
+        self.gl.draw_elements(
+            gl::TRIANGLES,
+            index_count,
+            gl::UNSIGNED_INT,
+            first_vertex as u32 * 4,
+        );
+    }
+
+    pub fn draw_nonindexed_points(&mut self, first_vertex: i32, vertex_count: i32) {
+        debug_assert!(self.inside_frame);
+        self.gl.draw_arrays(gl::POINTS, first_vertex, vertex_count);
+    }
+
+    #[cfg(feature = "debug_renderer")]
+    pub fn draw_nonindexed_lines(&mut self, first_vertex: i32, vertex_count: i32) {
+        debug_assert!(self.inside_frame);
+        self.gl.draw_arrays(gl::LINES, first_vertex, vertex_count);
+    }
+
+    pub fn draw_indexed_triangles_instanced_u16(&mut self, index_count: i32, instance_count: i32) {
+        debug_assert!(self.inside_frame);
+        self.gl.draw_elements_instanced(
+            gl::TRIANGLES,
+            index_count,
+            gl::UNSIGNED_SHORT,
+            0,
+            instance_count,
+        );
+    }
+
+    pub fn end_frame(&mut self) {
+        self.bind_draw_target(None, None);
+        self.bind_read_target(None);
+
+        debug_assert!(self.inside_frame);
+        self.inside_frame = false;
+
+        self.gl.bind_texture(gl::TEXTURE_2D, 0);
+        self.gl.use_program(0);
+
+        for i in 0 .. self.bound_textures.len() {
+            self.gl.active_texture(gl::TEXTURE0 + i as gl::GLuint);
+            self.gl.bind_texture(gl::TEXTURE_2D, 0);
+        }
+
+        self.gl.active_texture(gl::TEXTURE0);
+
+        self.frame_id.0 += 1;
+    }
+
+    pub fn clear_target(
+        &self,
+        color: Option<[f32; 4]>,
+        depth: Option<f32>,
+        rect: Option<DeviceIntRect>,
+    ) {
+        let mut clear_bits = 0;
+
+        if let Some(color) = color {
+            self.gl.clear_color(color[0], color[1], color[2], color[3]);
+            clear_bits |= gl::COLOR_BUFFER_BIT;
+        }
+
+        if let Some(depth) = depth {
+            if cfg!(debug_assertions) {
+                let mut mask = [0];
+                unsafe {
+                    self.gl.get_boolean_v(gl::DEPTH_WRITEMASK, &mut mask);
+                }
+                assert_ne!(mask[0], 0);
+            }
+            self.gl.clear_depth(depth as f64);
+            clear_bits |= gl::DEPTH_BUFFER_BIT;
+        }
+
+        if clear_bits != 0 {
+            match rect {
+                Some(rect) => {
+                    self.gl.enable(gl::SCISSOR_TEST);
+                    self.gl.scissor(
+                        rect.origin.x,
+                        rect.origin.y,
+                        rect.size.width,
+                        rect.size.height,
+                    );
+                    self.gl.clear(clear_bits);
+                    self.gl.disable(gl::SCISSOR_TEST);
+                }
+                None => {
+                    self.gl.clear(clear_bits);
+                }
+            }
+        }
+    }
+
+    pub fn enable_depth(&self) {
+        self.gl.enable(gl::DEPTH_TEST);
+    }
+
+    pub fn disable_depth(&self) {
+        self.gl.disable(gl::DEPTH_TEST);
+    }
+
+    pub fn set_depth_func(&self, depth_func: DepthFunction) {
+        self.gl.depth_func(depth_func as gl::GLuint);
+    }
+
+    pub fn enable_depth_write(&self) {
+        self.gl.depth_mask(true);
+    }
+
+    pub fn disable_depth_write(&self) {
+        self.gl.depth_mask(false);
+    }
+
+    pub fn disable_stencil(&self) {
+        self.gl.disable(gl::STENCIL_TEST);
+    }
+
+    pub fn set_scissor_rect(&self, rect: DeviceIntRect) {
+        self.gl.scissor(
+            rect.origin.x,
+            rect.origin.y,
+            rect.size.width,
+            rect.size.height,
+        );
+    }
+
+    pub fn enable_scissor(&self) {
+        self.gl.enable(gl::SCISSOR_TEST);
+    }
+
+    pub fn disable_scissor(&self) {
+        self.gl.disable(gl::SCISSOR_TEST);
+    }
+
+    pub fn set_blend(&self, enable: bool) {
+        if enable {
+            self.gl.enable(gl::BLEND);
+        } else {
+            self.gl.disable(gl::BLEND);
+        }
+    }
+
+    pub fn set_blend_mode_alpha(&self) {
+        self.gl.blend_func_separate(gl::SRC_ALPHA, gl::ONE_MINUS_SRC_ALPHA,
+                                    gl::ONE, gl::ONE);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
+
+    pub fn set_blend_mode_premultiplied_alpha(&self) {
+        self.gl.blend_func(gl::ONE, gl::ONE_MINUS_SRC_ALPHA);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
+
+    pub fn set_blend_mode_premultiplied_dest_out(&self) {
+        self.gl.blend_func(gl::ZERO, gl::ONE_MINUS_SRC_ALPHA);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
+
+    pub fn set_blend_mode_multiply(&self) {
+        self.gl
+            .blend_func_separate(gl::ZERO, gl::SRC_COLOR, gl::ZERO, gl::SRC_ALPHA);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
+    pub fn set_blend_mode_max(&self) {
+        self.gl
+            .blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE);
+        self.gl.blend_equation_separate(gl::MAX, gl::FUNC_ADD);
+    }
+    #[cfg(feature = "debug_renderer")]
+    pub fn set_blend_mode_min(&self) {
+        self.gl
+            .blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE);
+        self.gl.blend_equation_separate(gl::MIN, gl::FUNC_ADD);
+    }
+    pub fn set_blend_mode_subpixel_pass0(&self) {
+        self.gl.blend_func(gl::ZERO, gl::ONE_MINUS_SRC_COLOR);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
+    pub fn set_blend_mode_subpixel_pass1(&self) {
+        self.gl.blend_func(gl::ONE, gl::ONE);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
+    pub fn set_blend_mode_subpixel_with_bg_color_pass0(&self) {
+        self.gl.blend_func_separate(gl::ZERO, gl::ONE_MINUS_SRC_COLOR, gl::ZERO, gl::ONE);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
+    pub fn set_blend_mode_subpixel_with_bg_color_pass1(&self) {
+        self.gl.blend_func_separate(gl::ONE_MINUS_DST_ALPHA, gl::ONE, gl::ZERO, gl::ONE);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
+    pub fn set_blend_mode_subpixel_with_bg_color_pass2(&self) {
+        self.gl.blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE_MINUS_SRC_ALPHA);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
+    pub fn set_blend_mode_subpixel_constant_text_color(&self, color: ColorF) {
+        // color is an unpremultiplied color.
+        self.gl.blend_color(color.r, color.g, color.b, 1.0);
+        self.gl
+            .blend_func(gl::CONSTANT_COLOR, gl::ONE_MINUS_SRC_COLOR);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
+    pub fn set_blend_mode_subpixel_dual_source(&self) {
+        self.gl.blend_func(gl::ONE, gl::ONE_MINUS_SRC1_COLOR);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
+
+    pub fn supports_extension(&self, extension: &str) -> bool {
+        supports_extension(&self.extensions, extension)
+    }
+
+    pub fn echo_driver_messages(&self) {
+        for msg in self.gl.get_debug_messages() {
+            let level = match msg.severity {
+                gl::DEBUG_SEVERITY_HIGH => Level::Error,
+                gl::DEBUG_SEVERITY_MEDIUM => Level::Warn,
+                gl::DEBUG_SEVERITY_LOW => Level::Info,
+                gl::DEBUG_SEVERITY_NOTIFICATION => Level::Debug,
+                _ => Level::Trace,
+            };
+            let ty = match msg.ty {
+                gl::DEBUG_TYPE_ERROR => "error",
+                gl::DEBUG_TYPE_DEPRECATED_BEHAVIOR => "deprecated",
+                gl::DEBUG_TYPE_UNDEFINED_BEHAVIOR => "undefined",
+                gl::DEBUG_TYPE_PORTABILITY => "portability",
+                gl::DEBUG_TYPE_PERFORMANCE => "perf",
+                gl::DEBUG_TYPE_MARKER => "marker",
+                gl::DEBUG_TYPE_PUSH_GROUP => "group push",
+                gl::DEBUG_TYPE_POP_GROUP => "group pop",
+                gl::DEBUG_TYPE_OTHER => "other",
+                _ => "?",
+            };
+            log!(level, "({}) {}", ty, msg.message);
+        }
+    }
+
+    fn gl_describe_format(&self, format: ImageFormat) -> FormatDesc {
+        match format {
+            ImageFormat::R8 => FormatDesc {
+                internal: gl::RED as _,
+                external: gl::RED,
+                pixel_type: gl::UNSIGNED_BYTE,
+            },
+            ImageFormat::BGRA8 => {
+                let external = self.bgra_format;
+                FormatDesc {
+                    internal: match self.gl.get_type() {
+                        gl::GlType::Gl => gl::RGBA as _,
+                        gl::GlType::Gles => external as _,
+                    },
+                    external,
+                    pixel_type: gl::UNSIGNED_BYTE,
+                }
+            },
+            ImageFormat::RGBAF32 => FormatDesc {
+                internal: gl::RGBA32F as _,
+                external: gl::RGBA,
+                pixel_type: gl::FLOAT,
+            },
+            ImageFormat::RGBAI32 => FormatDesc {
+                internal: gl::RGBA32I as _,
+                external: gl::RGBA_INTEGER,
+                pixel_type: gl::INT,
+            },
+            ImageFormat::RG8 => FormatDesc {
+                internal: gl::RG8 as _,
+                external: gl::RG,
+                pixel_type: gl::UNSIGNED_BYTE,
+            },
+        }
+    }
+}
+
+struct FormatDesc {
+    internal: gl::GLint,
+    external: gl::GLuint,
+    pixel_type: gl::GLuint,
+}
+
+struct UploadChunk {
+    rect: DeviceUintRect,
+    layer_index: i32,
+    stride: Option<u32>,
+    offset: usize,
+}
+
+struct PixelBuffer {
+    usage: gl::GLenum,
+    size_allocated: usize,
+    size_used: usize,
+    // small vector avoids heap allocation for a single chunk
+    chunks: SmallVec<[UploadChunk; 1]>,
+}
+
+impl PixelBuffer {
+    fn new(
+        usage: gl::GLenum,
+        size_allocated: usize,
+    ) -> Self {
+        PixelBuffer {
+            usage,
+            size_allocated,
+            size_used: 0,
+            chunks: SmallVec::new(),
+        }
+    }
+}
+
+struct UploadTarget<'a> {
+    gl: &'a gl::Gl,
+    bgra_format: gl::GLuint,
+    texture: &'a Texture,
+}
+
+pub struct TextureUploader<'a, T> {
+    target: UploadTarget<'a>,
+    buffer: Option<PixelBuffer>,
+    marker: PhantomData<T>,
+}
+
+impl<'a, T> Drop for TextureUploader<'a, T> {
+    fn drop(&mut self) {
+        if let Some(buffer) = self.buffer.take() {
+            for chunk in buffer.chunks {
+                self.target.update_impl(chunk);
+            }
+            self.target.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
+        }
+    }
+}
+
+impl<'a, T> TextureUploader<'a, T> {
+    pub fn upload(
+        &mut self,
+        rect: DeviceUintRect,
+        layer_index: i32,
+        stride: Option<u32>,
+        data: &[T],
+    ) {
+        match self.buffer {
+            Some(ref mut buffer) => {
+                let upload_size = mem::size_of::<T>() * data.len();
+                if buffer.size_used + upload_size > buffer.size_allocated {
+                    // flush
+                    for chunk in buffer.chunks.drain() {
+                        self.target.update_impl(chunk);
+                    }
+                    buffer.size_used = 0;
+                }
+
+                if upload_size > buffer.size_allocated {
+                    gl::buffer_data(
+                        self.target.gl,
+                        gl::PIXEL_UNPACK_BUFFER,
+                        data,
+                        buffer.usage,
+                    );
+                    buffer.size_allocated = upload_size;
+                } else {
+                    gl::buffer_sub_data(
+                        self.target.gl,
+                        gl::PIXEL_UNPACK_BUFFER,
+                        buffer.size_used as _,
+                        data,
+                    );
+                }
+
+                buffer.chunks.push(UploadChunk {
+                    rect, layer_index, stride,
+                    offset: buffer.size_used,
+                });
+                buffer.size_used += upload_size;
+            }
+            None => {
+                self.target.update_impl(UploadChunk {
+                    rect, layer_index, stride,
+                    offset: data.as_ptr() as _,
+                });
+            }
+        }
+    }
+}
+
+impl<'a> UploadTarget<'a> {
+    fn update_impl(&mut self, chunk: UploadChunk) {
+        let (gl_format, bpp, data_type) = match self.texture.format {
+            ImageFormat::R8 => (gl::RED, 1, gl::UNSIGNED_BYTE),
+            ImageFormat::BGRA8 => (self.bgra_format, 4, gl::UNSIGNED_BYTE),
+            ImageFormat::RG8 => (gl::RG, 2, gl::UNSIGNED_BYTE),
+            ImageFormat::RGBAF32 => (gl::RGBA, 16, gl::FLOAT),
+            ImageFormat::RGBAI32 => (gl::RGBA_INTEGER, 16, gl::INT),
+        };
+
+        let row_length = match chunk.stride {
+            Some(value) => value / bpp,
+            None => self.texture.width,
+        };
+
+        if chunk.stride.is_some() {
+            self.gl.pixel_store_i(
+                gl::UNPACK_ROW_LENGTH,
+                row_length as _,
+            );
+        }
+
+        let pos = chunk.rect.origin;
+        let size = chunk.rect.size;
+
+        match self.texture.target {
+            gl::TEXTURE_2D_ARRAY => {
+                self.gl.tex_sub_image_3d_pbo(
+                    self.texture.target,
+                    0,
+                    pos.x as _,
+                    pos.y as _,
+                    chunk.layer_index,
+                    size.width as _,
+                    size.height as _,
+                    1,
+                    gl_format,
+                    data_type,
+                    chunk.offset,
+                );
+            }
+            gl::TEXTURE_2D | gl::TEXTURE_RECTANGLE | gl::TEXTURE_EXTERNAL_OES => {
+                self.gl.tex_sub_image_2d_pbo(
+                    self.texture.target,
+                    0,
+                    pos.x as _,
+                    pos.y as _,
+                    size.width as _,
+                    size.height as _,
+                    gl_format,
+                    data_type,
+                    chunk.offset,
+                );
+            }
+            _ => panic!("BUG: Unexpected texture target!"),
+        }
+
+        // If using tri-linear filtering, build the mip-map chain for this texture.
+        if self.texture.filter == TextureFilter::Trilinear {
+            self.gl.generate_mipmap(self.texture.target);
+        }
+
+        // Reset row length to 0, otherwise the stride would apply to all texture uploads.
+        if chunk.stride.is_some() {
+            self.gl.pixel_store_i(gl::UNPACK_ROW_LENGTH, 0 as _);
+        }
+    }
+}
+
+fn texels_to_u8_slice<T: Texel>(texels: &[T]) -> &[u8] {
+    unsafe {
+        slice::from_raw_parts(texels.as_ptr() as *const u8, texels.len() * mem::size_of::<T>())
+    }
+}
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/src/device/mod.rs
@@ -0,0 +1,7 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+mod gl;
+
+pub use self::gl::*;
--- a/gfx/webrender/src/display_list_flattener.rs
+++ b/gfx/webrender/src/display_list_flattener.rs
@@ -9,20 +9,20 @@ use api::{DevicePixelScale, DeviceUintRe
 use api::{FilterOp, FontInstanceKey, GlyphInstance, GlyphOptions, GlyphRasterSpace, GradientStop};
 use api::{IframeDisplayItem, ImageKey, ImageRendering, ItemRange, LayoutPoint};
 use api::{LayoutPrimitiveInfo, LayoutRect, LayoutSize, LayoutTransform, LayoutVector2D};
 use api::{LineOrientation, LineStyle, LocalClip, NinePatchBorderSource, PipelineId};
 use api::{PropertyBinding, ReferenceFrame, RepeatMode, ScrollFrameDisplayItem, ScrollSensitivity};
 use api::{Shadow, SpecificDisplayItem, StackingContext, StickyFrameDisplayItem, TexelRect};
 use api::{TransformStyle, YuvColorSpace, YuvData};
 use clip::{ClipRegion, ClipSource, ClipSources, ClipStore};
-use clip_scroll_node::{ClipScrollNode, NodeType, StickyFrameInfo};
+use clip_scroll_node::{NodeType, SpatialNodeKind, StickyFrameInfo};
 use clip_scroll_tree::{ClipChainIndex, ClipScrollNodeIndex, ClipScrollTree};
-use euclid::{SideOffsets2D, vec2};
-use frame_builder::{FrameBuilder, FrameBuilderConfig};
+use euclid::vec2;
+use frame_builder::{ChasePrimitive, FrameBuilder, FrameBuilderConfig};
 use glyph_rasterizer::FontInstance;
 use gpu_cache::GpuCacheHandle;
 use gpu_types::BrushFlags;
 use hit_test::{HitTestingItem, HitTestingRun};
 use image::simplify_repeated_primitive;
 use internal_types::{FastHashMap, FastHashSet};
 use picture::PictureCompositeMode;
 use prim_store::{BrushClipMaskKind, BrushKind, BrushPrimitive, BrushSegmentDescriptor};
@@ -623,40 +623,42 @@ impl<'a> DisplayListFlattener<'a> {
                     &prim_info,
                     info.wavy_line_thickness,
                     info.orientation,
                     &info.color,
                     info.style,
                 );
             }
             SpecificDisplayItem::Gradient(ref info) => {
-                self.add_gradient(
-                    clip_and_scroll,
+                let brush_kind = self.create_brush_kind_for_gradient(
                     &prim_info,
                     info.gradient.start_point,
                     info.gradient.end_point,
                     item.gradient_stops(),
                     info.gradient.extend_mode,
                     info.tile_size,
                     info.tile_spacing,
                 );
+                let prim = PrimitiveContainer::Brush(BrushPrimitive::new(brush_kind, None));
+                self.add_primitive(clip_and_scroll, &prim_info, Vec::new(), prim);
             }
             SpecificDisplayItem::RadialGradient(ref info) => {
-                self.add_radial_gradient(
-                    clip_and_scroll,
+                let brush_kind = self.create_brush_kind_for_radial_gradient(
                     &prim_info,
                     info.gradient.center,
                     info.gradient.start_offset * info.gradient.radius.width,
                     info.gradient.end_offset * info.gradient.radius.width,
                     info.gradient.radius.width / info.gradient.radius.height,
                     item.gradient_stops(),
                     info.gradient.extend_mode,
                     info.tile_size,
                     info.tile_spacing,
                 );
+                let prim = PrimitiveContainer::Brush(BrushPrimitive::new(brush_kind, None));
+                self.add_primitive(clip_and_scroll, &prim_info, Vec::new(), prim);
             }
             SpecificDisplayItem::BoxShadow(ref box_shadow_info) => {
                 let bounds = box_shadow_info
                     .box_bounds
                     .translate(&reference_frame_relative_offset);
                 let mut prim_info = prim_info.clone();
                 prim_info.rect = bounds;
                 self.add_box_shadow(
@@ -870,16 +872,20 @@ impl<'a> DisplayListFlattener<'a> {
                 let shadow_pic = &mut self.prim_store.pictures[shadow_pic_index.0];
                 shadow_pic.add_primitive(shadow_prim_index, clip_and_scroll);
             }
             self.shadow_stack = shadow_stack;
         }
 
         if container.is_visible() {
             let prim_index = self.create_primitive(info, clip_sources, container);
+            if cfg!(debug_assertions) && ChasePrimitive::LocalRect(info.rect) == self.config.chase_primitive {
+                println!("Chasing {:?}", prim_index);
+                self.prim_store.chase_id = Some(prim_index);
+            }
             self.add_primitive_to_hit_testing_list(info, clip_and_scroll);
             self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
         }
     }
 
     pub fn push_stacking_context(
         &mut self,
         pipeline_id: PipelineId,
@@ -1192,24 +1198,25 @@ impl<'a> DisplayListFlattener<'a> {
         reference_frame_id: ClipId,
         parent_id: Option<ClipId>,
         pipeline_id: PipelineId,
         source_transform: Option<PropertyBinding<LayoutTransform>>,
         source_perspective: Option<LayoutTransform>,
         origin_in_parent_reference_frame: LayoutVector2D,
     ) -> ClipScrollNodeIndex {
         let index = self.id_to_index_mapper.get_node_index(reference_frame_id);
-        let node = ClipScrollNode::new_reference_frame(
-            parent_id.map(|id| self.id_to_index_mapper.get_node_index(id)),
+        let parent_index = parent_id.map(|id| self.id_to_index_mapper.get_node_index(id));
+        self.clip_scroll_tree.add_reference_frame(
+            index,
+            parent_index,
             source_transform,
             source_perspective,
             origin_in_parent_reference_frame,
             pipeline_id,
         );
-        self.clip_scroll_tree.add_node(node, index);
         self.reference_frame_stack.push((reference_frame_id, index));
 
         match parent_id {
             Some(ref parent_id) =>
                 self.id_to_index_mapper.map_to_parent_clip_chain(reference_frame_id, parent_id),
             _ => self.id_to_index_mapper.add_clip_chain(reference_frame_id, ClipChainIndex(0)),
         }
         index
@@ -1222,17 +1229,17 @@ impl<'a> DisplayListFlattener<'a> {
     pub fn setup_viewport_offset(
         &mut self,
         inner_rect: DeviceUintRect,
         device_pixel_scale: DevicePixelScale,
     ) {
         let viewport_offset = (inner_rect.origin.to_vector().to_f32() / device_pixel_scale).round();
         let root_id = self.clip_scroll_tree.root_reference_frame_index();
         let root_node = &mut self.clip_scroll_tree.nodes[root_id.0];
-        if let NodeType::ReferenceFrame(ref mut info) = root_node.node_type {
+        if let NodeType::Spatial { kind: SpatialNodeKind::ReferenceFrame(ref mut info), .. } = root_node.node_type {
             info.resolved_transform =
                 LayoutVector2D::new(viewport_offset.x, viewport_offset.y).into();
         }
     }
 
     pub fn push_root(
         &mut self,
         pipeline_id: PipelineId,
@@ -1285,26 +1292,25 @@ impl<'a> DisplayListFlattener<'a> {
         parent_id: ClipId,
         external_id: Option<ExternalScrollId>,
         pipeline_id: PipelineId,
         frame_rect: &LayoutRect,
         content_size: &LayoutSize,
         scroll_sensitivity: ScrollSensitivity,
     ) -> ClipScrollNodeIndex {
         let node_index = self.id_to_index_mapper.get_node_index(new_node_id);
-        let node = ClipScrollNode::new_scroll_frame(
-            pipeline_id,
+        self.clip_scroll_tree.add_scroll_frame(
+            node_index,
             self.id_to_index_mapper.get_node_index(parent_id),
             external_id,
+            pipeline_id,
             frame_rect,
             content_size,
             scroll_sensitivity,
         );
-
-        self.clip_scroll_tree.add_node(node, node_index);
         self.id_to_index_mapper.map_to_parent_clip_chain(new_node_id, &parent_id);
         node_index
     }
 
     pub fn pop_reference_frame(&mut self) {
         self.reference_frame_stack.pop();
     }
 
@@ -1487,61 +1493,16 @@ impl<'a> DisplayListFlattener<'a> {
     pub fn add_border(
         &mut self,
         clip_and_scroll: ScrollNodeAndClipChain,
         info: &LayoutPrimitiveInfo,
         border_item: &BorderDisplayItem,
         gradient_stops: ItemRange<GradientStop>,
     ) {
         let rect = info.rect;
-        let create_segments = |outset: SideOffsets2D<f32>| {
-            // Calculate the modified rect as specific by border-image-outset
-            let origin = LayoutPoint::new(rect.origin.x - outset.left, rect.origin.y - outset.top);
-            let size = LayoutSize::new(
-                rect.size.width + outset.left + outset.right,
-                rect.size.height + outset.top + outset.bottom,
-            );
-            let rect = LayoutRect::new(origin, size);
-
-            let tl_outer = LayoutPoint::new(rect.origin.x, rect.origin.y);
-            let tl_inner = tl_outer + vec2(border_item.widths.left, border_item.widths.top);
-
-            let tr_outer = LayoutPoint::new(rect.origin.x + rect.size.width, rect.origin.y);
-            let tr_inner = tr_outer + vec2(-border_item.widths.right, border_item.widths.top);
-
-            let bl_outer = LayoutPoint::new(rect.origin.x, rect.origin.y + rect.size.height);
-            let bl_inner = bl_outer + vec2(border_item.widths.left, -border_item.widths.bottom);
-
-            let br_outer = LayoutPoint::new(
-                rect.origin.x + rect.size.width,
-                rect.origin.y + rect.size.height,
-            );
-            let br_inner = br_outer - vec2(border_item.widths.right, border_item.widths.bottom);
-
-            // Build the list of gradient segments
-            vec![
-                // Top left
-                LayoutRect::from_floats(tl_outer.x, tl_outer.y, tl_inner.x, tl_inner.y),
-                // Top right
-                LayoutRect::from_floats(tr_inner.x, tr_outer.y, tr_outer.x, tr_inner.y),
-                // Bottom right
-                LayoutRect::from_floats(br_inner.x, br_inner.y, br_outer.x, br_outer.y),
-                // Bottom left
-                LayoutRect::from_floats(bl_outer.x, bl_inner.y, bl_inner.x, bl_outer.y),
-                // Top
-                LayoutRect::from_floats(tl_inner.x, tl_outer.y, tr_inner.x, tl_inner.y),
-                // Bottom
-                LayoutRect::from_floats(bl_inner.x, bl_inner.y, br_inner.x, bl_outer.y),
-                // Left
-                LayoutRect::from_floats(tl_outer.x, tl_inner.y, tl_inner.x, bl_inner.y),
-                // Right
-                LayoutRect::from_floats(tr_inner.x, tr_inner.y, br_outer.x, br_inner.y),
-            ]
-        };
-
         match border_item.details {
             BorderDetails::NinePatch(ref border) => {
                 // Calculate the modified rect as specific by border-image-outset
                 let origin = LayoutPoint::new(
                     rect.origin.x - border.outset.left,
                     rect.origin.y - border.outset.top,
                 );
                 let size = LayoutSize::new(
@@ -1696,94 +1657,75 @@ impl<'a> DisplayListFlattener<'a> {
                     RepeatMode::Stretch,
                     border.repeat_vertical,
                 );
                 let descriptor = BrushSegmentDescriptor {
                     segments,
                     clip_mask_kind: BrushClipMaskKind::Unknown,
                 };
 
-                let prim = PrimitiveContainer::Brush(match border.source {
+                let brush_kind = match border.source {
                     NinePatchBorderSource::Image(image_key) => {
-                        let source = BorderSource::Image(ImageRequest {
-                            key: image_key,
-                            rendering: ImageRendering::Auto,
-                            tile: None,
-                        });
-
-                        BrushPrimitive::new(
-                            BrushKind::Border {
-                                source
-                            },
-                            Some(descriptor),
+                        BrushKind::Border {
+                            source: BorderSource::Image(ImageRequest {
+                                key: image_key,
+                                rendering: ImageRendering::Auto,
+                                tile: None,
+                            })
+                        }
+                    }
+                    NinePatchBorderSource::Gradient(gradient) => {
+                        self.create_brush_kind_for_gradient(
+                            &info,
+                            gradient.start_point,
+                            gradient.end_point,
+                            gradient_stops,
+                            gradient.extend_mode,
+                            LayoutSize::new(border.height as f32, border.width as f32),
+                            LayoutSize::zero(),
                         )
                     }
-                });
+                    NinePatchBorderSource::RadialGradient(gradient) => {
+                        self.create_brush_kind_for_radial_gradient(
+                            &info,
+                            gradient.center,
+                            gradient.start_offset * gradient.radius.width,
+                            gradient.end_offset * gradient.radius.width,
+                            gradient.radius.width / gradient.radius.height,
+                            gradient_stops,
+                            gradient.extend_mode,
+                            LayoutSize::new(border.height as f32, border.width as f32),
+                            LayoutSize::zero(),
+                        )
+                    }
+                };
 
+                let prim = PrimitiveContainer::Brush(
+                    BrushPrimitive::new(brush_kind, Some(descriptor))
+                );
                 self.add_primitive(clip_and_scroll, info, Vec::new(), prim);
             }
             BorderDetails::Normal(ref border) => {
                 self.add_normal_border(info, border, &border_item.widths, clip_and_scroll);
             }
-            BorderDetails::Gradient(ref border) => for segment in create_segments(border.outset) {
-                let segment_rel = segment.origin - rect.origin;
-                let mut info = info.clone();
-                info.rect = segment;
-
-                self.add_gradient(
-                    clip_and_scroll,
-                    &info,
-                    border.gradient.start_point - segment_rel,
-                    border.gradient.end_point - segment_rel,
-                    gradient_stops,
-                    border.gradient.extend_mode,
-                    segment.size,
-                    LayoutSize::zero(),
-                );
-            },
-            BorderDetails::RadialGradient(ref border) => {
-                for segment in create_segments(border.outset) {
-                    let segment_rel = segment.origin - rect.origin;
-                    let mut info = info.clone();
-                    info.rect = segment;
-
-                    self.add_radial_gradient(
-                        clip_and_scroll,
-                        &info,
-                        border.gradient.center - segment_rel,
-                        border.gradient.start_offset * border.gradient.radius.width,
-                        border.gradient.end_offset * border.gradient.radius.width,
-                        border.gradient.radius.width / border.gradient.radius.height,
-                        gradient_stops,
-                        border.gradient.extend_mode,
-                        segment.size,
-                        LayoutSize::zero(),
-                    );
-                }
-            }
         }
     }
 
-    pub fn add_gradient(
+    pub fn create_brush_kind_for_gradient(
         &mut self,
-        clip_and_scroll: ScrollNodeAndClipChain,
         info: &LayoutPrimitiveInfo,
         start_point: LayoutPoint,
         end_point: LayoutPoint,
         stops: ItemRange<GradientStop>,
         extend_mode: ExtendMode,
         stretch_size: LayoutSize,
         mut tile_spacing: LayoutSize,
-    ) {
+    ) -> BrushKind {
         let mut prim_rect = info.rect;
         simplify_repeated_primitive(&stretch_size, &mut tile_spacing, &mut prim_rect);
-        let info = LayoutPrimitiveInfo {
-            rect: prim_rect,
-            .. *info
-        };
 
         // Try to ensure that if the gradient is specified in reverse, then so long as the stops
         // are also supplied in reverse that the rendered result will be equivalent. To do this,
         // a reference orientation for the gradient line must be chosen, somewhat arbitrarily, so
         // just designate the reference orientation as start < end. Aligned gradient rendering
         // manages to produce the same result regardless of orientation, so don't worry about
         // reversing in that case.
         let reverse_stops = start_point.x > end_point.x ||
@@ -1793,78 +1735,56 @@ impl<'a> DisplayListFlattener<'a> {
         // points, it's necessary to reverse the gradient
         // line in some cases.
         let (sp, ep) = if reverse_stops {
             (end_point, start_point)
         } else {
             (start_point, end_point)
         };
 
-        let prim = BrushPrimitive::new(
-            BrushKind::LinearGradient {
-                stops_range: stops,
-                extend_mode,
-                reverse_stops,
-                start_point: sp,
-                end_point: ep,
-                stops_handle: GpuCacheHandle::new(),
-                stretch_size,
-                tile_spacing,
-                visible_tiles: Vec::new(),
-            },
-            None,
-        );
-
-        let prim = PrimitiveContainer::Brush(prim);
-
-        self.add_primitive(clip_and_scroll, &info, Vec::new(), prim);
+        BrushKind::LinearGradient {
+            stops_range: stops,
+            extend_mode,
+            reverse_stops,
+            start_point: sp,
+            end_point: ep,
+            stops_handle: GpuCacheHandle::new(),
+            stretch_size,
+            tile_spacing,
+            visible_tiles: Vec::new(),
+        }
     }
 
-    pub fn add_radial_gradient(
+    pub fn create_brush_kind_for_radial_gradient(
         &mut self,
-        clip_and_scroll: ScrollNodeAndClipChain,
         info: &LayoutPrimitiveInfo,
         center: LayoutPoint,
         start_radius: f32,
         end_radius: f32,
         ratio_xy: f32,
         stops: ItemRange<GradientStop>,
         extend_mode: ExtendMode,
         stretch_size: LayoutSize,
         mut tile_spacing: LayoutSize,
-    ) {
+    ) -> BrushKind {
         let mut prim_rect = info.rect;
         simplify_repeated_primitive(&stretch_size, &mut tile_spacing, &mut prim_rect);
-        let info = LayoutPrimitiveInfo {
-            rect: prim_rect,
-            .. *info
-        };
 
-        let prim = BrushPrimitive::new(
-            BrushKind::RadialGradient {
-                stops_range: stops,
-                extend_mode,
-                center,
-                start_radius,
-                end_radius,
-                ratio_xy,
-                stops_handle: GpuCacheHandle::new(),
-                stretch_size,
-                tile_spacing,
-                visible_tiles: Vec::new(),
-            },
-            None,
-        );
-
-        self.add_primitive(
-            clip_and_scroll,
-            &info,
-            Vec::new(),
-            PrimitiveContainer::Brush(prim),
-        );
+        BrushKind::RadialGradient {
+            stops_range: stops,
+            extend_mode,
+            center,
+            start_radius,
+            end_radius,
+            ratio_xy,
+            stops_handle: GpuCacheHandle::new(),
+            stretch_size,
+            tile_spacing,
+            visible_tiles: Vec::new(),
+        }
     }
 
     pub fn add_text(
         &mut self,
         clip_and_scroll: ScrollNodeAndClipChain,
         run_offset: LayoutVector2D,
         prim_info: &LayoutPrimitiveInfo,
         font_instance_key: &FontInstanceKey,
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -1,44 +1,60 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BuiltDisplayList, ColorF, DeviceIntPoint, DeviceIntRect, DevicePixelScale};
 use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize, DocumentLayer, FontRenderMode};
-use api::{LayoutRect, LayoutSize, PipelineId, WorldPoint};
+use api::{LayoutPoint, LayoutRect, LayoutSize, PipelineId, WorldPoint};
 use clip::{ClipChain, ClipStore};
 use clip_scroll_node::{ClipScrollNode};
 use clip_scroll_tree::{ClipScrollNodeIndex, ClipScrollTree};
 use display_list_flattener::{DisplayListFlattener};
 use gpu_cache::GpuCache;
-use gpu_types::{ClipChainRectIndex, ClipScrollNodeData, UvRectKind};
+use gpu_types::{PrimitiveHeaders, TransformData, UvRectKind};
 use hit_test::{HitTester, HitTestingRun};
 use internal_types::{FastHashMap};
 use picture::PictureSurface;
 use prim_store::{PrimitiveIndex, PrimitiveRun, PrimitiveStore};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_backend::FrameId;
 use render_task::{RenderTask, RenderTaskId, RenderTaskLocation, RenderTaskTree};
 use resource_cache::{ResourceCache};
 use scene::{ScenePipeline, SceneProperties};
 use std::{mem, f32};
 use std::sync::Arc;
 use tiling::{Frame, RenderPass, RenderPassKind, RenderTargetContext};
 use tiling::{ScrollbarPrimitive, SpecialRenderPasses};
-use util::{self, MaxRect, WorldToLayoutFastTransform};
+use util::{self, WorldToLayoutFastTransform};
+
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum ChasePrimitive {
+    Nothing,
+    LocalRect(LayoutRect),
+}
+
+impl Default for ChasePrimitive {
+    fn default() -> Self {
+        ChasePrimitive::Nothing
+    }
+}
 
 #[derive(Clone, Copy)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct FrameBuilderConfig {
     pub enable_scrollbars: bool,
     pub default_font_render_mode: FontRenderMode,
     pub dual_source_blending_is_supported: bool,
     pub dual_source_blending_is_enabled: bool,
+    pub chase_primitive: ChasePrimitive,
 }
 
 /// A builder structure for `tiling::Frame`
 pub struct FrameBuilder {
     screen_rect: DeviceUintRect,
     background_color: Option<ColorF>,
     window_size: DeviceUintSize,
     scene_id: u64,
@@ -51,24 +67,24 @@ pub struct FrameBuilder {
 
 pub struct FrameBuildingContext<'a> {
     pub scene_id: u64,
     pub device_pixel_scale: DevicePixelScale,
     pub scene_properties: &'a SceneProperties,
     pub pipelines: &'a FastHashMap<PipelineId, Arc<ScenePipeline>>,
     pub screen_rect: DeviceIntRect,
     pub clip_scroll_tree: &'a ClipScrollTree,
-    pub node_data: &'a [ClipScrollNodeData],
+    pub transforms: &'a [TransformData],
+    pub max_local_clip: LayoutRect,
 }
 
 pub struct FrameBuildingState<'a> {
     pub render_tasks: &'a mut RenderTaskTree,
     pub profile_counters: &'a mut FrameProfileCounters,
     pub clip_store: &'a mut ClipStore,
-    pub local_clip_rects: &'a mut Vec<LayoutRect>,
     pub resource_cache: &'a mut ResourceCache,
     pub gpu_cache: &'a mut GpuCache,
     pub special_render_passes: &'a mut SpecialRenderPasses,
 }
 
 pub struct PictureContext<'a> {
     pub pipeline_id: PipelineId,
     pub prim_runs: Vec<PrimitiveRun>,
@@ -94,29 +110,29 @@ impl PictureState {
             local_rect_changed: false,
         }
     }
 }
 
 pub struct PrimitiveRunContext<'a> {
     pub clip_chain: &'a ClipChain,
     pub scroll_node: &'a ClipScrollNode,
-    pub clip_chain_rect_index: ClipChainRectIndex,
+    pub local_clip_rect: LayoutRect,
 }
 
 impl<'a> PrimitiveRunContext<'a> {
     pub fn new(
         clip_chain: &'a ClipChain,
         scroll_node: &'a ClipScrollNode,
-        clip_chain_rect_index: ClipChainRectIndex,
+        local_clip_rect: LayoutRect,
     ) -> Self {
         PrimitiveRunContext {
             clip_chain,
             scroll_node,
-            clip_chain_rect_index,
+            local_clip_rect,
         }
     }
 }
 
 impl FrameBuilder {
     pub fn empty() -> Self {
         FrameBuilder {
             hit_testing_runs: Vec::new(),
@@ -127,16 +143,17 @@ impl FrameBuilder {
             window_size: DeviceUintSize::zero(),
             background_color: None,
             scene_id: 0,
             config: FrameBuilderConfig {
                 enable_scrollbars: false,
                 default_font_render_mode: FontRenderMode::Mono,
                 dual_source_blending_is_enabled: true,
                 dual_source_blending_is_supported: false,
+                chase_primitive: ChasePrimitive::Nothing,
             },
         }
     }
 
     pub fn with_display_list_flattener(
         screen_rect: DeviceUintRect,
         background_color: Option<ColorF>,
         window_size: DeviceUintSize,
@@ -164,49 +181,53 @@ impl FrameBuilder {
         pipelines: &FastHashMap<PipelineId, Arc<ScenePipeline>>,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         special_render_passes: &mut SpecialRenderPasses,
         profile_counters: &mut FrameProfileCounters,
         device_pixel_scale: DevicePixelScale,
         scene_properties: &SceneProperties,
-        local_clip_rects: &mut Vec<LayoutRect>,
-        node_data: &[ClipScrollNodeData],
+        transforms: &[TransformData],
     ) -> Option<RenderTaskId> {
         profile_scope!("cull");
 
         if self.prim_store.pictures.is_empty() {
             return None
         }
 
         // The root picture is always the first one added.
         let root_clip_scroll_node =
             &clip_scroll_tree.nodes[clip_scroll_tree.root_reference_frame_index().0];
 
         let display_list = &pipelines
             .get(&root_clip_scroll_node.pipeline_id)
             .expect("No display list?")
             .display_list;
 
+        const MAX_CLIP_COORD: f32 = 1.0e9;
+
         let frame_context = FrameBuildingContext {
             scene_id: self.scene_id,
             device_pixel_scale,
             scene_properties,
             pipelines,
             screen_rect: self.screen_rect.to_i32(),
             clip_scroll_tree,
-            node_data,
+            transforms,
+            max_local_clip: LayoutRect::new(
+                LayoutPoint::new(-MAX_CLIP_COORD, -MAX_CLIP_COORD),
+                LayoutSize::new(2.0 * MAX_CLIP_COORD, 2.0 * MAX_CLIP_COORD),
+            ),
         };
 
         let mut frame_state = FrameBuildingState {
             render_tasks,
             profile_counters,
             clip_store: &mut self.clip_store,
-            local_clip_rects,
             resource_cache,
             gpu_cache,
             special_render_passes,
         };
 
         let pic_context = PictureContext {
             pipeline_id: root_clip_scroll_node.pipeline_id,
             prim_runs: mem::replace(&mut self.prim_store.pictures[0].runs, Vec::new()),
@@ -296,30 +317,23 @@ impl FrameBuilder {
         let mut profile_counters = FrameProfileCounters::new();
         profile_counters
             .total_primitives
             .set(self.prim_store.prim_count());
 
         resource_cache.begin_frame(frame_id);
         gpu_cache.begin_frame();
 
-        let mut node_data = Vec::with_capacity(clip_scroll_tree.nodes.len());
-        let total_prim_runs =
-            self.prim_store.pictures.iter().fold(1, |count, pic| count + pic.runs.len());
-        let mut clip_chain_local_clip_rects = Vec::with_capacity(total_prim_runs);
-        clip_chain_local_clip_rects.push(LayoutRect::max_rect());
-
-        clip_scroll_tree.update_tree(
+        let transform_palette = clip_scroll_tree.update_tree(
             &self.screen_rect.to_i32(),
             device_pixel_scale,
             &mut self.clip_store,
             resource_cache,
             gpu_cache,
             pan,
-            &mut node_data,
             scene_properties,
         );
 
         self.update_scroll_bars(clip_scroll_tree, gpu_cache);
 
         let mut render_tasks = RenderTaskTree::new(frame_id);
 
         let screen_size = self.screen_rect.size.to_i32();
@@ -330,18 +344,17 @@ impl FrameBuilder {
             pipelines,
             resource_cache,
             gpu_cache,
             &mut render_tasks,
             &mut special_render_passes,
             &mut profile_counters,
             device_pixel_scale,
             scene_properties,
-            &mut clip_chain_local_clip_rects,
-            &node_data,
+            &transform_palette.transforms,
         );
 
         resource_cache.block_until_all_resources_added(gpu_cache,
                                                        &mut render_tasks,
                                                        texture_cache_profile);
 
         let mut passes = vec![
             special_render_passes.alpha_glyph_pass,
@@ -364,35 +377,37 @@ impl FrameBuilder {
                 main_render_task_id,
                 required_pass_count - 1,
                 &mut passes[2..],
             );
         }
 
         let mut deferred_resolves = vec![];
         let mut has_texture_cache_tasks = false;
+        let mut prim_headers = PrimitiveHeaders::new();
         let use_dual_source_blending = self.config.dual_source_blending_is_enabled &&
                                        self.config.dual_source_blending_is_supported;
 
         for pass in &mut passes {
             let mut ctx = RenderTargetContext {
                 device_pixel_scale,
                 prim_store: &self.prim_store,
                 resource_cache,
                 clip_scroll_tree,
                 use_dual_source_blending,
-                node_data: &node_data,
+                transforms: &transform_palette,
             };
 
             pass.build(
                 &mut ctx,
                 gpu_cache,
                 &mut render_tasks,
                 &mut deferred_resolves,
                 &self.clip_store,
+                &mut prim_headers,
             );
 
             if let RenderPassKind::OffScreen { ref texture_cache, .. } = pass.kind {
                 has_texture_cache_tasks |= !texture_cache.is_empty();
             }
         }
 
         let gpu_cache_frame_id = gpu_cache.end_frame(gpu_cache_profile);
@@ -404,23 +419,23 @@ impl FrameBuilder {
         Frame {
             window_size: self.window_size,
             inner_rect: self.screen_rect,
             device_pixel_ratio: device_pixel_scale.0,
             background_color: self.background_color,
             layer,
             profile_counters,
             passes,
-            node_data,
-            clip_chain_local_clip_rects,
+            transform_palette: transform_palette.transforms,
             render_tasks,
             deferred_resolves,
             gpu_cache_frame_id,
             has_been_rendered: false,
             has_texture_cache_tasks,
+            prim_headers,
         }
     }
 
     pub fn create_hit_tester(&mut self, clip_scroll_tree: &ClipScrollTree) -> HitTester {
         HitTester::new(
             &self.hit_testing_runs,
             clip_scroll_tree,
             &self.clip_store
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -1,35 +1,31 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{DevicePoint, DeviceSize, DeviceRect, LayoutToWorldTransform};
+use api::{DevicePoint, DeviceSize, DeviceRect, LayoutRect, LayoutToWorldTransform};
 use api::{PremultipliedColorF, WorldToLayoutTransform};
+use clip_scroll_tree::TransformIndex;
 use gpu_cache::{GpuCacheAddress, GpuDataRequest};
-use prim_store::{VECS_PER_SEGMENT, EdgeAaSegmentMask};
+use prim_store::{EdgeAaSegmentMask};
 use render_task::RenderTaskAddress;
-use renderer::MAX_VERTEX_TEXTURE_WIDTH;
+use util::{MatrixHelpers, TransformedRectKind};
 
 // Contains type that must exactly match the same structures declared in GLSL.
 
-const INT_BITS: usize = 31; //TODO: convert to unsigned
-const CLIP_CHAIN_RECT_BITS: usize = 22;
-const SEGMENT_BITS: usize = INT_BITS - CLIP_CHAIN_RECT_BITS;
-// The guard ensures (at compile time) that the designated number of bits cover
-// the maximum supported segment count for the texture width.
-const _SEGMENT_GUARD: usize = (1 << SEGMENT_BITS) * VECS_PER_SEGMENT - MAX_VERTEX_TEXTURE_WIDTH;
-const EDGE_FLAG_BITS: usize = 4;
-const BRUSH_FLAG_BITS: usize = 4;
-const CLIP_SCROLL_INDEX_BITS: usize = INT_BITS - EDGE_FLAG_BITS - BRUSH_FLAG_BITS;
-
 #[derive(Copy, Clone, Debug)]
 #[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ZBufferId(i32);
 
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ZBufferIdGenerator {
     next: i32,
 }
 
 impl ZBufferIdGenerator {
     pub fn new() -> Self {
         ZBufferIdGenerator {
             next: 0
@@ -114,76 +110,150 @@ pub struct BorderInstance {
 /// Could be an image or a rectangle, which defines the
 /// way `address` is treated.
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[repr(C)]
 pub struct ClipMaskInstance {
     pub render_task_address: RenderTaskAddress,
-    pub scroll_node_data_index: ClipScrollNodeIndex,
+    pub transform_id: TransformPaletteId,
     pub segment: i32,
     pub clip_data_address: GpuCacheAddress,
     pub resource_address: GpuCacheAddress,
 }
 
 /// A border corner dot or dash drawn into the clipping mask.
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[repr(C)]
 pub struct ClipMaskBorderCornerDotDash {
     pub clip_mask_instance: ClipMaskInstance,
     pub dot_dash_data: [f32; 8],
 }
 
-// 32 bytes per instance should be enough for anyone!
+// 16 bytes per instance should be enough for anyone!
 #[derive(Debug, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct PrimitiveInstance {
-    data: [i32; 8],
+    data: [i32; 4],
+}
+
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveHeaderIndex(pub i32);
+
+#[derive(Debug)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveHeaders {
+    // The integer-type headers for a primitive.
+    pub headers_int: Vec<PrimitiveHeaderI>,
+    // The float-type headers for a primitive.
+    pub headers_float: Vec<PrimitiveHeaderF>,
+    // Used to generated a unique z-buffer value per primitive.
+    pub z_generator: ZBufferIdGenerator,
+}
+
+impl PrimitiveHeaders {
+    pub fn new() -> PrimitiveHeaders {
+        PrimitiveHeaders {
+            headers_int: Vec::new(),
+            headers_float: Vec::new(),
+            z_generator: ZBufferIdGenerator::new(),
+        }
+    }
+
+    // Add a new primitive header.
+    pub fn push(
+        &mut self,
+        prim_header: &PrimitiveHeader,
+        user_data: [i32; 3],
+    ) -> PrimitiveHeaderIndex {
+        debug_assert_eq!(self.headers_int.len(), self.headers_float.len());
+        let id = self.headers_float.len();
+
+        self.headers_float.push(PrimitiveHeaderF {
+            local_rect: prim_header.local_rect,
+            local_clip_rect: prim_header.local_clip_rect,
+        });
+
+        self.headers_int.push(PrimitiveHeaderI {
+            z: self.z_generator.next(),
+            task_address: prim_header.task_address,
+            specific_prim_address: prim_header.specific_prim_address.as_int(),
+            clip_task_address: prim_header.clip_task_address,
+            transform_id: prim_header.transform_id,
+            user_data,
+        });
+
+        PrimitiveHeaderIndex(id as i32)
+    }
+}
+
+// This is a convenience type used to make it easier to pass
+// the common parts around during batching.
+pub struct PrimitiveHeader {
+    pub local_rect: LayoutRect,
+    pub local_clip_rect: LayoutRect,
+    pub task_address: RenderTaskAddress,
+    pub specific_prim_address: GpuCacheAddress,
+    pub clip_task_address: RenderTaskAddress,
+    pub transform_id: TransformPaletteId,
+}
+
+// f32 parts of a primitive header
+#[derive(Debug)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveHeaderF {
+    pub local_rect: LayoutRect,
+    pub local_clip_rect: LayoutRect,
+}
+
+// i32 parts of a primitive header
+// TODO(gw): Compress parts of these down to u16
+#[derive(Debug)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveHeaderI {
+    pub z: ZBufferId,
+    pub task_address: RenderTaskAddress,
+    pub specific_prim_address: i32,
+    pub clip_task_address: RenderTaskAddress,
+    pub transform_id: TransformPaletteId,
+    pub user_data: [i32; 3],
 }
 
 pub struct GlyphInstance {
-    pub specific_prim_address: GpuCacheAddress,
-    pub task_address: RenderTaskAddress,
-    pub clip_task_address: RenderTaskAddress,
-    pub clip_chain_rect_index: ClipChainRectIndex,
-    pub scroll_id: ClipScrollNodeIndex,
-    pub z: ZBufferId,
+    pub prim_header_index: PrimitiveHeaderIndex,
 }
 
 impl GlyphInstance {
     pub fn new(
-        specific_prim_address: GpuCacheAddress,
-        task_address: RenderTaskAddress,
-        clip_task_address: RenderTaskAddress,
-        clip_chain_rect_index: ClipChainRectIndex,
-        scroll_id: ClipScrollNodeIndex,
-        z: ZBufferId,
+        prim_header_index: PrimitiveHeaderIndex,
     ) -> Self {
         GlyphInstance {
-            specific_prim_address,
-            task_address,
-            clip_task_address,
-            clip_chain_rect_index,
-            scroll_id,
-            z,
+            prim_header_index,
         }
     }
 
+    // TODO(gw): Some of these fields can be moved to the primitive
+    //           header since they are constant, and some can be
+    //           compressed to a smaller size.
     pub fn build(&self, data0: i32, data1: i32, data2: i32) -> PrimitiveInstance {
         PrimitiveInstance {
             data: [
-                self.specific_prim_address.as_int(),
-                self.task_address.0 as i32 | (self.clip_task_address.0 as i32) << 16,
-                self.clip_chain_rect_index.0 as i32,
-                self.scroll_id.0 as i32,
-                self.z.0,
+                self.prim_header_index.0 as i32,
                 data0,
                 data1,
                 data2,
             ],
         }
     }
 }
 
@@ -213,20 +283,16 @@ impl SplitCompositeInstance {
 impl From<SplitCompositeInstance> for PrimitiveInstance {
     fn from(instance: SplitCompositeInstance) -> Self {
         PrimitiveInstance {
             data: [
                 instance.task_address.0 as i32,
                 instance.src_task_address.0 as i32,
                 instance.polygons_address.as_int(),
                 instance.z.0,
-                0,
-                0,
-                0,
-                0,
             ],
         }
     }
 }
 
 bitflags! {
     /// Flags that define how the common brush shader
     /// code should process this instance.
@@ -238,90 +304,151 @@ bitflags! {
         const SEGMENT_RELATIVE = 0x2;
         /// Repeat UVs horizontally.
         const SEGMENT_REPEAT_X = 0x4;
         /// Repeat UVs vertically.
         const SEGMENT_REPEAT_Y = 0x8;
     }
 }
 
-// TODO(gw): While we are converting things over, we
-//           need to have the instance be the same
-//           size as an old PrimitiveInstance. In the
-//           future, we can compress this vertex
-//           format a lot - e.g. z, render task
-//           addresses etc can reasonably become
-//           a u16 type.
+// TODO(gw): Some of these fields can be moved to the primitive
+//           header since they are constant, and some can be
+//           compressed to a smaller size.
 #[repr(C)]
 pub struct BrushInstance {
-    pub picture_address: RenderTaskAddress,
-    pub prim_address: GpuCacheAddress,
-    pub clip_chain_rect_index: ClipChainRectIndex,
-    pub scroll_id: ClipScrollNodeIndex,
+    pub prim_header_index: PrimitiveHeaderIndex,
     pub clip_task_address: RenderTaskAddress,
-    pub z: ZBufferId,
     pub segment_index: i32,
     pub edge_flags: EdgeAaSegmentMask,
     pub brush_flags: BrushFlags,
-    pub user_data: [i32; 3],
 }
 
 impl From<BrushInstance> for PrimitiveInstance {
     fn from(instance: BrushInstance) -> Self {
-        debug_assert_eq!(0, instance.clip_chain_rect_index.0 >> CLIP_CHAIN_RECT_BITS);
-        debug_assert_eq!(0, instance.scroll_id.0 >> CLIP_SCROLL_INDEX_BITS);
-        debug_assert_eq!(0, instance.segment_index >> SEGMENT_BITS);
         PrimitiveInstance {
             data: [
-                instance.picture_address.0 as i32 | (instance.clip_task_address.0 as i32) << 16,
-                instance.prim_address.as_int(),
-                instance.clip_chain_rect_index.0 as i32 | (instance.segment_index << CLIP_CHAIN_RECT_BITS),
-                instance.z.0,
-                instance.scroll_id.0 as i32 |
-                    ((instance.edge_flags.bits() as i32) << CLIP_SCROLL_INDEX_BITS) |
-                    ((instance.brush_flags.bits() as i32) << (CLIP_SCROLL_INDEX_BITS + EDGE_FLAG_BITS)),
-                instance.user_data[0],
-                instance.user_data[1],
-                instance.user_data[2],
+                instance.prim_header_index.0,
+                instance.clip_task_address.0 as i32,
+                instance.segment_index |
+                ((instance.edge_flags.bits() as i32) << 16) |
+                ((instance.brush_flags.bits() as i32) << 24),
+                0,
             ]
         }
     }
 }
 
+// Represents the information about a transform palette
+// entry that is passed to shaders. It includes an index
+// into the transform palette, and a set of flags. The
+// only flag currently used determines whether the
+// transform is axis-aligned (and this should have
+// pixel snapping applied).
 #[derive(Copy, Debug, Clone, PartialEq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[repr(C)]
-pub struct ClipScrollNodeIndex(pub u32);
+pub struct TransformPaletteId(pub u32);
+
+impl TransformPaletteId {
+    // Get the palette ID for an identity transform.
+    pub fn identity() -> TransformPaletteId {
+        TransformPaletteId(0)
+    }
 
+    // Extract the transform kind from the id.
+    pub fn transform_kind(&self) -> TransformedRectKind {
+        if (self.0 >> 24) == 0 {
+            TransformedRectKind::AxisAligned
+        } else {
+            TransformedRectKind::Complex
+        }
+    }
+}
+
+// The GPU data payload for a transform palette entry.
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[repr(C)]
-pub struct ClipScrollNodeData {
+pub struct TransformData {
     pub transform: LayoutToWorldTransform,
     pub inv_transform: WorldToLayoutTransform,
-    pub transform_kind: f32,
-    pub padding: [f32; 3],
 }
 
-impl ClipScrollNodeData {
+impl TransformData {
     pub fn invalid() -> Self {
-        ClipScrollNodeData {
+        TransformData {
             transform: LayoutToWorldTransform::identity(),
             inv_transform: WorldToLayoutTransform::identity(),
-            transform_kind: 0.0,
-            padding: [0.0; 3],
         }
     }
 }
 
-#[derive(Copy, Debug, Clone, PartialEq)]
-#[repr(C)]
-pub struct ClipChainRectIndex(pub usize);
+// Extra data stored about each transform palette entry.
+pub struct TransformMetadata {
+    pub transform_kind: TransformedRectKind,
+}
+
+// Stores a contiguous list of TransformData structs, that
+// are ready for upload to the GPU.
+// TODO(gw): For now, this only stores the complete local
+//           to world transform for each spatial node. In
+//           the future, the transform palette will support
+//           specifying a coordinate system that the transform
+//           should be relative to.
+pub struct TransformPalette {
+    pub transforms: Vec<TransformData>,
+    metadata: Vec<TransformMetadata>,
+}
+
+impl TransformPalette {
+    pub fn new(spatial_node_count: usize) -> TransformPalette {
+        TransformPalette {
+            transforms: Vec::with_capacity(spatial_node_count),
+            metadata: Vec::with_capacity(spatial_node_count),
+        }
+    }
+
+    // Set the local -> world transform for a given spatial
+    // node in the transform palette.
+    pub fn set(
+        &mut self,
+        index: TransformIndex,
+        data: TransformData,
+    ) {
+        let index = index.0 as usize;
+
+        // Pad the vectors out if they are not long enough to
+        // account for this index. This can occur, for instance,
+        // when we stop recursing down the CST due to encountering
+        // a node with an invalid transform.
+        while index >= self.transforms.len() {
+            self.transforms.push(TransformData::invalid());
+            self.metadata.push(TransformMetadata {
+                transform_kind: TransformedRectKind::AxisAligned,
+            });
+        }
+
+        // Store the transform itself, along with metadata about it.
+        self.metadata[index] = TransformMetadata {
+            transform_kind: data.transform.transform_kind(),
+        };
+        self.transforms[index] = data;
+    }
+
+    // Get a transform palette id for the given spatial node.
+    // TODO(gw): In the future, it will be possible to specify
+    //           a coordinate system id here, to allow retrieving
+    //           transforms in the local space of a given spatial node.
+    pub fn get_id(&self, index: TransformIndex) -> TransformPaletteId {
+        let transform_kind = self.metadata[index.0 as usize].transform_kind as u32;
+        TransformPaletteId(index.0 | (transform_kind << 24))
+    }
+}
 
 // Texture cache resources can be either a simple rect, or define
 // a polygon within a rect by specifying a UV coordinate for each
 // corner. This is useful for rendering screen-space rasterized
 // off-screen surfaces.
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -177,15 +177,16 @@ extern crate base64;
 #[cfg(all(feature = "capture", feature = "png"))]
 extern crate png;
 
 pub extern crate webrender_api;
 
 #[doc(hidden)]
 pub use device::{build_shader_strings, ReadPixelsFormat, UploadMethod, VertexUsageHint};
 pub use device::{ProgramBinary, ProgramCache, ProgramCacheObserver, ProgramSources};
+pub use frame_builder::ChasePrimitive;
 pub use renderer::{AsyncPropertySampler, CpuProfile, DebugFlags, OutputImageHandler, RendererKind};
 pub use renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource, GpuProfile};
 pub use renderer::{GraphicsApi, GraphicsApiInfo, PipelineInfo, Renderer, RendererOptions};
 pub use renderer::{RendererStats, SceneBuilderHooks, ThreadListener};
 pub use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 pub use webrender_api as api;
 pub use resource_cache::intersect_for_tile;
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -271,17 +271,17 @@ impl PicturePrimitive {
         }
     }
 
     // Disallow subpixel AA if an intermediate surface is needed.
     pub fn allow_subpixel_aa(&self) -> bool {
         self.can_draw_directly_to_parent_surface()
     }
 
-    pub fn prepare_for_render_inner(
+    pub fn prepare_for_render(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_metadata: &mut PrimitiveMetadata,
         prim_run_context: &PrimitiveRunContext,
         mut pic_state_for_children: PictureState,
         pic_state: &mut PictureState,
         frame_context: &FrameBuildingContext,
         frame_state: &mut FrameBuildingState,
@@ -484,26 +484,19 @@ impl PicturePrimitive {
                     // TODO(gw): This is very hacky code below! It stores an extra
                     //           brush primitive below for the special case of a
                     //           drop-shadow where we need a different local
                     //           rect for the shadow. To tidy this up in future,
                     //           we could consider abstracting the code in prim_store.rs
                     //           that writes a brush primitive header.
 
                     // Basic brush primitive header is (see end of prepare_prim_for_render_inner in prim_store.rs)
-                    //  local_rect
-                    //  clip_rect
                     //  [brush specific data]
                     //  [segment_rect, segment data]
                     let shadow_rect = prim_metadata.local_rect.translate(&offset);
-                    let shadow_clip_rect = prim_metadata.local_clip_rect.translate(&offset);
-
-                    // local_rect, clip_rect
-                    request.push(shadow_rect);
-                    request.push(shadow_clip_rect);
 
                     // ImageBrush colors
                     request.push(color.premultiplied());
                     request.push(PremultipliedColorF::WHITE);
                     request.push([
                         prim_metadata.local_rect.size.width,
                         prim_metadata.local_rect.size.height,
                         0.0,
@@ -587,37 +580,16 @@ impl PicturePrimitive {
                 );
 
                 let render_task_id = frame_state.render_tasks.add(picture_task);
                 pic_state.tasks.push(render_task_id);
                 self.surface = Some(PictureSurface::RenderTask(render_task_id));
             }
         }
     }
-
-    pub fn prepare_for_render(
-        &mut self,
-        prim_index: PrimitiveIndex,
-        prim_metadata: &mut PrimitiveMetadata,
-        prim_run_context: &PrimitiveRunContext,
-        pic_state_for_children: PictureState,
-        pic_state: &mut PictureState,
-        frame_context: &FrameBuildingContext,
-        frame_state: &mut FrameBuildingState,
-    ) {
-        self.prepare_for_render_inner(
-            prim_index,
-            prim_metadata,
-            prim_run_context,
-            pic_state_for_children,
-            pic_state,
-            frame_context,
-            frame_state,
-        );
-    }
 }
 
 // Calculate a single screen-space UV for a picture.
 fn calculate_screen_uv(
     local_pos: &LayoutPoint,
     clip_scroll_node: &ClipScrollNode,
     rendered_rect: &DeviceRect,
     device_pixel_scale: DevicePixelScale,
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{AlphaType, BorderRadius, BoxShadowClipMode, BuiltDisplayList, ClipMode, ColorF, ComplexClipRegion};
+use api::{AlphaType, BorderRadius, BoxShadowClipMode, BuiltDisplayList, ClipMode, ColorF};
 use api::{DeviceIntRect, DeviceIntSize, DevicePixelScale, Epoch, ExtendMode};
 use api::{FilterOp, GlyphInstance, GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag, TileOffset};
 use api::{GlyphRasterSpace, LayoutPoint, LayoutRect, LayoutSize, LayoutToWorldTransform, LayoutVector2D};
 use api::{PipelineId, PremultipliedColorF, PropertyBinding, Shadow, YuvColorSpace, YuvFormat, DeviceIntSideOffsets};
 use api::{BorderWidths, LayoutToWorldScale, NormalBorder};
 use app_units::Au;
 use border::{BorderCacheKey, BorderRenderTaskInfo};
 use box_shadow::BLUR_SAMPLE_SCALE;
@@ -15,17 +15,17 @@ use clip_scroll_tree::{ClipChainIndex, C
 use clip_scroll_node::ClipScrollNode;
 use clip::{ClipChain, ClipChainNode, ClipChainNodeIter, ClipChainNodeRef, ClipSource};
 use clip::{ClipSourcesHandle, ClipWorkItem};
 use frame_builder::{FrameBuildingContext, FrameBuildingState, PictureContext, PictureState};
 use frame_builder::PrimitiveRunContext;
 use glyph_rasterizer::{FontInstance, FontTransform, GlyphKey, FONT_SIZE_LIMIT};
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
                 ToGpuBlocks};
-use gpu_types::{BrushFlags, ClipChainRectIndex};
+use gpu_types::BrushFlags;
 use image::{for_each_tile, for_each_repetition};
 use picture::{PictureCompositeMode, PictureId, PicturePrimitive};
 #[cfg(debug_assertions)]
 use render_backend::FrameId;
 use render_task::{BlitSource, RenderTask, RenderTaskCacheKey};
 use render_task::{RenderTaskCacheKeyKind, RenderTaskId, RenderTaskCacheEntryHandle};
 use renderer::{MAX_VERTEX_TEXTURE_WIDTH};
 use resource_cache::{ImageProperties, ImageRequest};
@@ -45,28 +45,39 @@ pub struct ScrollNodeAndClipChain {
     pub scroll_node_id: ClipScrollNodeIndex,
     pub clip_chain_index: ClipChainIndex,
 }
 
 impl ScrollNodeAndClipChain {
     pub fn new(
         scroll_node_id: ClipScrollNodeIndex,
         clip_chain_index: ClipChainIndex
-    ) -> ScrollNodeAndClipChain {
+    ) -> Self {
         ScrollNodeAndClipChain { scroll_node_id, clip_chain_index }
     }
 }
 
 #[derive(Debug)]
 pub struct PrimitiveRun {
     pub base_prim_index: PrimitiveIndex,
     pub count: usize,
     pub clip_and_scroll: ScrollNodeAndClipChain,
 }
 
+impl PrimitiveRun {
+    pub fn is_chasing(&self, index: Option<PrimitiveIndex>) -> bool {
+        match index {
+            Some(id) if cfg!(debug_assertions) => {
+                self.base_prim_index <= id && id.0 < self.base_prim_index.0 + self.count
+            }
+            _ => false,
+        }
+    }
+}
+
 #[derive(Debug, Copy, Clone)]
 pub struct PrimitiveOpacity {
     pub is_opaque: bool,
 }
 
 impl PrimitiveOpacity {
     pub fn opaque() -> PrimitiveOpacity {
         PrimitiveOpacity { is_opaque: true }
@@ -169,17 +180,21 @@ pub struct PrimitiveMetadata {
     pub gpu_location: GpuCacheHandle,
     pub clip_task_id: Option<RenderTaskId>,
 
     // TODO(gw): In the future, we should just pull these
     //           directly from the DL item, instead of
     //           storing them here.
     pub local_rect: LayoutRect,
     pub local_clip_rect: LayoutRect,
-    pub clip_chain_rect_index: ClipChainRectIndex,
+
+    // The current combined local clip for this primitive, from
+    // the primitive local clip above and the current clip chain.
+    pub combined_local_clip_rect: LayoutRect,
+
     pub is_backface_visible: bool,
     pub screen_rect: Option<ScreenRect>,
 
     /// A tag used to identify this primitive outside of WebRender. This is
     /// used for returning useful data during hit testing.
     pub tag: Option<ItemTag>,
 
     /// The last frame ID (of the `RenderTaskTree`) this primitive
@@ -229,21 +244,25 @@ impl OpacityBinding {
     }
 }
 
 #[derive(Debug)]
 pub struct VisibleImageTile {
     pub tile_offset: TileOffset,
     pub handle: GpuCacheHandle,
     pub edge_flags: EdgeAaSegmentMask,
+    pub local_rect: LayoutRect,
+    pub local_clip_rect: LayoutRect,
 }
 
 #[derive(Debug)]
 pub struct VisibleGradientTile {
     pub handle: GpuCacheHandle,
+    pub local_rect: LayoutRect,
+    pub local_clip_rect: LayoutRect,
 }
 
 #[derive(Debug)]
 pub enum BorderSource {
     Image(ImageRequest),
     Border {
         handle: Option<RenderTaskCacheEntryHandle>,
         cache_key: BorderCacheKey,
@@ -413,24 +432,24 @@ pub struct BrushPrimitive {
     pub kind: BrushKind,
     pub segment_desc: Option<BrushSegmentDescriptor>,
 }
 
 impl BrushPrimitive {
     pub fn new(
         kind: BrushKind,
         segment_desc: Option<BrushSegmentDescriptor>,
-    ) -> BrushPrimitive {
+    ) -> Self {
         BrushPrimitive {
             kind,
             segment_desc,
         }
     }
 
-    pub fn new_picture(pic_index: PictureIndex) -> BrushPrimitive {
+    pub fn new_picture(pic_index: PictureIndex) -> Self {
         BrushPrimitive {
             kind: BrushKind::Picture {
                 pic_index,
             },
             segment_desc: None,
         }
     }
 
@@ -714,17 +733,16 @@ impl<'a> GradientGpuBlockBuilder<'a> {
 
                 cur_color = next_color;
             }
             if cur_idx != GRADIENT_DATA_TABLE_END {
                 error!("Gradient stops abruptly at {}, auto-completing to white", cur_idx);
                 self.fill_colors(cur_idx, GRADIENT_DATA_TABLE_END, &PremultipliedColorF::WHITE, &cur_color, &mut entries);
             }
 
-
             // Fill in the last entry with the last color stop
             self.fill_colors(
                 GRADIENT_DATA_LAST_STOP,
                 GRADIENT_DATA_LAST_STOP + 1,
                 &cur_color,
                 &cur_color,
                 &mut entries,
             );
@@ -734,98 +752,135 @@ impl<'a> GradientGpuBlockBuilder<'a> {
             request.push(entry.start_color);
             request.push(entry.end_color);
         }
     }
 }
 
 #[derive(Debug, Clone)]
 pub struct TextRunPrimitiveCpu {
-    pub font: FontInstance,
+    pub specified_font: FontInstance,
+    pub used_font: FontInstance,
     pub offset: LayoutVector2D,
     pub glyph_range: ItemRange<GlyphInstance>,
     pub glyph_keys: Vec<GlyphKey>,
     pub glyph_gpu_blocks: Vec<GpuBlockData>,
-    pub glyph_transform: (DevicePixelScale, FontTransform),
     pub shadow: bool,
     pub glyph_raster_space: GlyphRasterSpace,
 }
 
 impl TextRunPrimitiveCpu {
     pub fn new(
         font: FontInstance,
         offset: LayoutVector2D,
         glyph_range: ItemRange<GlyphInstance>,
         glyph_keys: Vec<GlyphKey>,
         shadow: bool,
         glyph_raster_space: GlyphRasterSpace,
     ) -> Self {
         TextRunPrimitiveCpu {
-            font,
+            specified_font: font.clone(),
+            used_font: font,
             offset,
             glyph_range,
             glyph_keys,
             glyph_gpu_blocks: Vec::new(),
-            glyph_transform: (DevicePixelScale::new(1.0), FontTransform::identity()),
             shadow,
             glyph_raster_space,
         }
     }
 
-    pub fn get_font(
-        &self,
+    pub fn update_font_instance(
+        &mut self,
         device_pixel_scale: DevicePixelScale,
-        transform: LayoutToWorldTransform,
-    ) -> FontInstance {
-        let mut font = self.font.clone();
-        font.size = font.size.scale_by(device_pixel_scale.0);
+        transform: &LayoutToWorldTransform,
+        allow_subpixel_aa: bool,
+    ) -> bool {
+        // Get the current font size in device pixels
+        let device_font_size = self.specified_font.size.scale_by(device_pixel_scale.0);
+
+        // Determine if rasterizing glyphs in local or screen space.
         // Only support transforms that can be coerced to simple 2D transforms.
-        if transform.has_perspective_component() ||
+        let transform_glyphs = if transform.has_perspective_component() ||
            !transform.has_2d_inverse() ||
            // Font sizes larger than the limit need to be scaled, thus can't use subpixels.
-           transform.exceeds_2d_scale(FONT_SIZE_LIMIT / font.size.to_f64_px()) ||
+           transform.exceeds_2d_scale(FONT_SIZE_LIMIT / device_font_size.to_f64_px()) ||
            // Otherwise, ensure the font is rasterized in screen-space.
            self.glyph_raster_space != GlyphRasterSpace::Screen {
-            font.disable_subpixel_aa();
-            font.disable_subpixel_position();
+            false
         } else {
+            true
+        };
+
+        // Get the font transform matrix (skew / scale) from the complete transform.
+        let font_transform = if transform_glyphs {
             // Quantize the transform to minimize thrashing of the glyph cache.
-            font.transform = FontTransform::from(&transform).quantize();
+            FontTransform::from(transform).quantize()
+        } else {
+            FontTransform::identity()
+        };
+
+        // If the transform or device size is different, then the caller of
+        // this method needs to know to rebuild the glyphs.
+        let cache_dirty =
+            self.used_font.transform != font_transform ||
+            self.used_font.size != device_font_size;
+
+        // Construct used font instance from the specified font instance
+        self.used_font = FontInstance {
+            transform: font_transform,
+            size: device_font_size,
+            ..self.specified_font.clone()
+        };
+
+        // If subpixel AA is disabled due to the backing surface the glyphs
+        // are being drawn onto, disable it (unless we are using the
+        // specifial subpixel mode that estimates background color).
+        if !allow_subpixel_aa && self.specified_font.bg_color.a == 0 {
+            self.used_font.disable_subpixel_aa();
         }
-        font
+
+        // If using local space glyphs, we don't want subpixel AA
+        // or positioning.
+        if !transform_glyphs {
+            self.used_font.disable_subpixel_aa();
+            self.used_font.disable_subpixel_position();
+        }
+
+        cache_dirty
     }
 
     fn prepare_for_render(
         &mut self,
         device_pixel_scale: DevicePixelScale,
-        transform: LayoutToWorldTransform,
+        transform: &LayoutToWorldTransform,
         allow_subpixel_aa: bool,
         display_list: &BuiltDisplayList,
         frame_building_state: &mut FrameBuildingState,
     ) {
-        if !allow_subpixel_aa && self.font.bg_color.a == 0 {
-            self.font.disable_subpixel_aa();
-        }
-
-        let font = self.get_font(device_pixel_scale, transform);
+        let cache_dirty = self.update_font_instance(
+            device_pixel_scale,
+            transform,
+            allow_subpixel_aa,
+        );
 
         // Cache the glyph positions, if not in the cache already.
         // TODO(gw): In the future, remove `glyph_instances`
         //           completely, and just reference the glyphs
         //           directly from the display list.
-        if self.glyph_keys.is_empty() || self.glyph_transform != (device_pixel_scale, font.transform) {
-            let subpx_dir = font.get_subpx_dir();
+        if self.glyph_keys.is_empty() || cache_dirty {
+            let subpx_dir = self.used_font.get_subpx_dir();
             let src_glyphs = display_list.get(self.glyph_range);
 
             // TODO(gw): If we support chunks() on AuxIter
             //           in the future, this code below could
             //           be much simpler...
             let mut gpu_block = [0.0; 4];
             for (i, src) in src_glyphs.enumerate() {
-                let world_offset = font.transform.transform(&src.point);
+                let world_offset = self.used_font.transform.transform(&src.point);
                 let device_offset = device_pixel_scale.transform_point(&world_offset);
                 let key = GlyphKey::new(src.index, device_offset, subpx_dir);
                 self.glyph_keys.push(key);
 
                 // Two glyphs are packed per GPU block.
 
                 if (i & 1) == 0 {
                     gpu_block[0] = src.point.x;
@@ -837,32 +892,30 @@ impl TextRunPrimitiveCpu {
                 }
             }
 
             // Ensure the last block is added in the case
             // of an odd number of glyphs.
             if (self.glyph_keys.len() & 1) != 0 {
                 self.glyph_gpu_blocks.push(gpu_block.into());
             }
-
-            self.glyph_transform = (device_pixel_scale, font.transform);
         }
 
         frame_building_state.resource_cache
-                            .request_glyphs(font,
+                            .request_glyphs(self.used_font.clone(),
                                             &self.glyph_keys,
                                             frame_building_state.gpu_cache,
                                             frame_building_state.render_tasks,
                                             frame_building_state.special_render_passes);
     }
 
     fn write_gpu_blocks(&self, request: &mut GpuDataRequest) {
-        request.push(ColorF::from(self.font.color).premultiplied());
+        request.push(ColorF::from(self.used_font.color).premultiplied());
         // this is the only case where we need to provide plain color to GPU
-        let bg_color = ColorF::from(self.font.bg_color);
+        let bg_color = ColorF::from(self.used_font.bg_color);
         request.push([bg_color.r, bg_color.g, bg_color.b, 1.0]);
         request.push([
             self.offset.x,
             self.offset.y,
             0.0,
             0.0,
         ]);
         request.extend_from_slice(&self.glyph_gpu_blocks);
@@ -1066,17 +1119,17 @@ impl PrimitiveContainer {
     // TODO(gw): Currently, primitives other than those
     //           listed here are handled before the
     //           add_primitive() call. In the future
     //           we should move the logic for all other
     //           primitive types to use this.
     pub fn is_visible(&self) -> bool {
         match *self {
             PrimitiveContainer::TextRun(ref info) => {
-                info.font.color.a > 0
+                info.specified_font.color.a > 0
             }
             PrimitiveContainer::Brush(ref brush) => {
                 match brush.kind {
                     BrushKind::Solid { ref color, .. } => {
                         color.a > 0.0
                     }
                     BrushKind::Clear |
                     BrushKind::Picture { .. } |
@@ -1095,17 +1148,17 @@ impl PrimitiveContainer {
     // Create a clone of this PrimitiveContainer, applying whatever
     // changes are necessary to the primitive to support rendering
     // it as part of the supplied shadow.
     pub fn create_shadow(&self, shadow: &Shadow) -> PrimitiveContainer {
         match *self {
             PrimitiveContainer::TextRun(ref info) => {
                 let mut font = FontInstance {
                     color: shadow.color.into(),
-                    ..info.font.clone()
+                    ..info.specified_font.clone()
                 };
                 if shadow.blur_radius > 0.0 {
                     font.disable_subpixel_aa();
                 }
 
                 PrimitiveContainer::TextRun(TextRunPrimitiveCpu::new(
                     font,
                     info.offset + shadow.offset,
@@ -1141,38 +1194,45 @@ impl PrimitiveContainer {
 pub struct PrimitiveStore {
     /// CPU side information only.
     pub cpu_brushes: Vec<BrushPrimitive>,
     pub cpu_text_runs: Vec<TextRunPrimitiveCpu>,
     pub cpu_metadata: Vec<PrimitiveMetadata>,
 
     pub pictures: Vec<PicturePrimitive>,
     next_picture_id: u64,
+
+    /// A primitive index to chase through debugging.
+    pub chase_id: Option<PrimitiveIndex>,
 }
 
 impl PrimitiveStore {
     pub fn new() -> PrimitiveStore {
         PrimitiveStore {
             cpu_metadata: Vec::new(),
             cpu_brushes: Vec::new(),
             cpu_text_runs: Vec::new(),
 
             pictures: Vec::new(),
             next_picture_id: 0,
+
+            chase_id: None,
         }
     }
 
     pub fn recycle(self) -> Self {
         PrimitiveStore {
             cpu_metadata: recycle_vec(self.cpu_metadata),
             cpu_brushes: recycle_vec(self.cpu_brushes),
             cpu_text_runs: recycle_vec(self.cpu_text_runs),
 
             pictures: recycle_vec(self.pictures),
             next_picture_id: self.next_picture_id,
+
+            chase_id: self.chase_id,
         }
     }
 
     pub fn add_image_picture(
         &mut self,
         composite_mode: Option<PictureCompositeMode>,
         is_in_3d_context: bool,
         pipeline_id: PipelineId,
@@ -1208,17 +1268,17 @@ impl PrimitiveStore {
         let prim_index = self.cpu_metadata.len();
 
         let base_metadata = PrimitiveMetadata {
             clip_sources,
             gpu_location: GpuCacheHandle::new(),
             clip_task_id: None,
             local_rect: *local_rect,
             local_clip_rect: *local_clip_rect,
-            clip_chain_rect_index: ClipChainRectIndex(0),
+            combined_local_clip_rect: *local_clip_rect,
             is_backface_visible,
             screen_rect: None,
             tag,
             opacity: PrimitiveOpacity::translucent(),
             prim_kind: PrimitiveKind::Brush,
             cpu_prim_index: SpecificPrimitiveIndex(0),
             #[cfg(debug_assertions)]
             prepared_frame_id: FrameId(0),
@@ -1490,17 +1550,17 @@ impl PrimitiveStore {
 
         match metadata.prim_kind {
             PrimitiveKind::TextRun => {
                 let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
                 // The transform only makes sense for screen space rasterization
                 let transform = prim_run_context.scroll_node.world_content_transform.into();
                 text.prepare_for_render(
                     frame_context.device_pixel_scale,
-                    transform,
+                    &transform,
                     pic_context.allow_subpixel_aa,
                     pic_context.display_list,
                     frame_state,
                 );
             }
             PrimitiveKind::Brush => {
                 let brush = &mut self.cpu_brushes[metadata.cpu_prim_index.0];
 
@@ -1639,17 +1699,19 @@ impl PrimitiveStore {
 
                             if let Some(tile_size) = image_properties.tiling {
 
                                 let device_image_size = image_properties.descriptor.size;
 
                                 // Tighten the clip rect because decomposing the repeated image can
                                 // produce primitives that are partially covering the original image
                                 // rect and we want to clip these extra parts out.
-                                let tight_clip_rect = metadata.local_clip_rect.intersection(&metadata.local_rect).unwrap();
+                                let tight_clip_rect = metadata
+                                    .combined_local_clip_rect
+                                    .intersection(&metadata.local_rect).unwrap();
 
                                 let visible_rect = compute_conservative_visible_rect(
                                     prim_run_context,
                                     frame_context,
                                     &tight_clip_rect
                                 );
 
                                 let base_edge_flags = edge_flags_for_tile_spacing(tile_spacing);
@@ -1679,28 +1741,28 @@ impl PrimitiveStore {
 
                                                 frame_state.resource_cache.request_image(
                                                     request.with_tile(tile_offset),
                                                     frame_state.gpu_cache,
                                                 );
 
                                                 let mut handle = GpuCacheHandle::new();
                                                 if let Some(mut request) = frame_state.gpu_cache.request(&mut handle) {
-                                                    request.push(*tile_rect);
-                                                    request.push(tight_clip_rect);
                                                     request.push(ColorF::new(1.0, 1.0, 1.0, opacity_binding.current).premultiplied());
                                                     request.push(PremultipliedColorF::WHITE);
                                                     request.push([tile_rect.size.width, tile_rect.size.height, 0.0, 0.0]);
                                                     request.write_segment(*tile_rect, [0.0; 4]);
                                                 }
 
                                                 visible_tiles.push(VisibleImageTile {
                                                     tile_offset,
                                                     handle,
                                                     edge_flags: tile_flags & edge_flags,
+                                                    local_rect: *tile_rect,
+                                                    local_clip_rect: tight_clip_rect,
                                                 });
                                             }
                                         );
                                     }
                                 );
 
                                 if visible_tiles.is_empty() {
                                     // At this point if we don't have tiles to show it means we could probably
@@ -1784,19 +1846,17 @@ impl PrimitiveStore {
                             decompose_repeated_primitive(
                                 visible_tiles,
                                 metadata,
                                 &stretch_size,
                                 &tile_spacing,
                                 prim_run_context,
                                 frame_context,
                                 frame_state,
-                                &mut |rect, clip_rect, mut request| {
-                                    request.push(*rect);
-                                    request.push(*clip_rect);
+                                &mut |rect, mut request| {
                                     request.push([
                                         center.x,
                                         center.y,
                                         start_radius,
                                         end_radius,
                                     ]);
                                     request.push([
                                         ratio_xy,
@@ -1836,19 +1896,17 @@ impl PrimitiveStore {
                             decompose_repeated_primitive(
                                 visible_tiles,
                                 metadata,
                                 &stretch_size,
                                 &tile_spacing,
                                 prim_run_context,
                                 frame_context,
                                 frame_state,
-                                &mut |rect, clip_rect, mut request| {
-                                    request.push(*rect);
-                                    request.push(*clip_rect);
+                                &mut |rect, mut request| {
                                     request.push([
                                         start_point.x,
                                         start_point.y,
                                         end_point.x,
                                         end_point.y,
                                     ]);
                                     request.push([
                                         pack_as_float(extend_mode as u32),
@@ -1890,20 +1948,16 @@ impl PrimitiveStore {
 
         if is_tiled {
             // we already requested each tile's gpu data.
             return;
         }
 
         // Mark this GPU resource as required for this frame.
         if let Some(mut request) = frame_state.gpu_cache.request(&mut metadata.gpu_location) {
-            // has to match VECS_PER_BRUSH_PRIM
-            request.push(metadata.local_rect);
-            request.push(metadata.local_clip_rect);
-
             match metadata.prim_kind {
                 PrimitiveKind::TextRun => {
                     let text = &self.cpu_text_runs[metadata.cpu_prim_index.0];
                     text.write_gpu_blocks(&mut request);
                 }
                 PrimitiveKind::Brush => {
                     let brush = &self.cpu_brushes[metadata.cpu_prim_index.0];
                     brush.write_gpu_blocks(&mut request, metadata.local_rect);
@@ -2037,21 +2091,21 @@ impl PrimitiveStore {
                         continue;
                     }
                 };
 
                 // If the scroll node transforms are different between the clip
                 // node and the primitive, we need to get the clip rect in the
                 // local space of the primitive, in order to generate correct
                 // local segments.
-                let local_clip_rect = if clip_item.scroll_node_data_index == prim_run_context.scroll_node.node_data_index {
+                let local_clip_rect = if clip_item.transform_index == prim_run_context.scroll_node.transform_index {
                     local_clip_rect
                 } else {
                     let clip_transform = frame_context
-                        .node_data[clip_item.scroll_node_data_index.0 as usize]
+                        .transforms[clip_item.transform_index.0 as usize]
                         .transform;
                     let prim_transform = &prim_run_context.scroll_node.world_content_transform;
                     let relative_transform = prim_transform
                         .inverse()
                         .unwrap_or(WorldToLayoutFastTransform::identity())
                         .pre_mul(&clip_transform.into());
 
                     relative_transform.transform_rect(&local_clip_rect)
@@ -2188,30 +2242,40 @@ impl PrimitiveStore {
         &mut self,
         prim_index: PrimitiveIndex,
         prim_run_context: &PrimitiveRunContext,
         prim_screen_rect: &DeviceIntRect,
         pic_state: &mut PictureState,
         frame_context: &FrameBuildingContext,
         frame_state: &mut FrameBuildingState,
     ) -> bool {
+        if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+            println!("\tupdating clip task with screen rect {:?}", prim_screen_rect);
+        }
         // Reset clips from previous frames since we may clip differently each frame.
         self.reset_clip_task(prim_index);
 
         let prim_screen_rect = match prim_screen_rect.intersection(&frame_context.screen_rect) {
             Some(rect) => rect,
             None => {
+                if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+                    println!("\tculled by the intersection with frame rect {:?}",
+                        frame_context.screen_rect);
+                }
                 self.cpu_metadata[prim_index.0].screen_rect = None;
                 return false;
             }
         };
 
         let mut combined_outer_rect =
             prim_screen_rect.intersection(&prim_run_context.clip_chain.combined_outer_screen_rect);
         let clip_chain = prim_run_context.clip_chain.nodes.clone();
+        if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+            println!("\tbase combined outer rect {:?}", combined_outer_rect);
+        }
 
         let prim_coordinate_system_id = prim_run_context.scroll_node.coordinate_system_id;
         let transform = &prim_run_context.scroll_node.world_content_transform;
         let extra_clip =  {
             let metadata = &self.cpu_metadata[prim_index.0];
             metadata.clip_sources.as_ref().map(|clip_sources| {
                 let prim_clips = frame_state.clip_store.get_mut(clip_sources);
                 prim_clips.update(
@@ -2223,20 +2287,23 @@ impl PrimitiveStore {
                     transform,
                     frame_context.device_pixel_scale,
                     Some(&prim_screen_rect),
                 );
 
                 if let Some(outer) = screen_outer_rect {
                     combined_outer_rect = combined_outer_rect.and_then(|r| r.intersection(&outer));
                 }
+                if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+                    println!("\tfound extra clip with screen bounds {:?}", screen_outer_rect);
+                }
 
                 Arc::new(ClipChainNode {
                     work_item: ClipWorkItem {
-                        scroll_node_data_index: prim_run_context.scroll_node.node_data_index,
+                        transform_index: prim_run_context.scroll_node.transform_index,
                         clip_sources: clip_sources.weak(),
                         coordinate_system_id: prim_coordinate_system_id,
                     },
                     // The local_clip_rect a property of ClipChain nodes that are ClipScrollNodes.
                     // It's used to calculate a local clipping rectangle before we reach this
                     // point, so we can set it to zero here. It should be unused from this point
                     // on.
                     local_clip_rect: LayoutRect::zero(),
@@ -2246,16 +2313,19 @@ impl PrimitiveStore {
                 })
             })
         };
 
         // If everything is clipped out, then we don't need to render this primitive.
         let combined_outer_rect = match combined_outer_rect {
             Some(rect) if !rect.is_empty() => rect,
             _ => {
+                if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+                    println!("\tculled by the empty combined screen rect");
+                }
                 self.cpu_metadata[prim_index.0].screen_rect = None;
                 return false;
             }
         };
 
         let mut has_clips_from_other_coordinate_systems = false;
         let mut combined_inner_rect = frame_context.screen_rect;
         let clips = convert_clip_chain_to_clip_vector(
@@ -2269,58 +2339,74 @@ impl PrimitiveStore {
 
         // This can happen if we had no clips or if all the clips were optimized away. In
         // some cases we still need to create a clip mask in order to create a rectangular
         // clip in screen space coordinates.
         if clips.is_empty() {
             // If we don't have any clips from other coordinate systems, the local clip
             // calculated from the clip chain should be sufficient to ensure proper clipping.
             if !has_clips_from_other_coordinate_systems {
+                if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+                    println!("\tneed no task: all clips are within the coordinate system");
+                }
                 return true;
             }
 
             // If we have filtered all clips and the screen rect isn't any smaller, we can just
             // skip masking entirely.
             if combined_outer_rect == prim_screen_rect {
+                if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+                    println!("\tneed no task: combined rect is not smaller");
+                }
                 return true;
             }
             // Otherwise we create an empty mask, but with an empty inner rect to avoid further
             // optimization of the empty mask.
             combined_inner_rect = DeviceIntRect::zero();
         }
 
         if combined_inner_rect.contains_rect(&prim_screen_rect) {
-           return true;
+            if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+                println!("\tneed no task: contained within the clip inner rect");
+            }
+            return true;
         }
 
         // First try to  render this primitive's mask using optimized brush rendering.
         if self.update_clip_task_for_brush(
             prim_run_context,
             prim_index,
             &clips,
             &combined_outer_rect,
             has_clips_from_other_coordinate_systems,
             pic_state,
             frame_context,
             frame_state,
         ) {
+            if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+                println!("\tsegment tasks have been created for clipping");
+            }
             return true;
         }
 
         let clip_task = RenderTask::new_mask(
             combined_outer_rect,
             clips,
             prim_coordinate_system_id,
             frame_state.clip_store,
             frame_state.gpu_cache,
             frame_state.resource_cache,
             frame_state.render_tasks,
         );
 
         let clip_task_id = frame_state.render_tasks.add(clip_task);
+        if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+            println!("\tcreated task {:?} with combined outer rect {:?}",
+                clip_task_id, combined_outer_rect);
+        }
         self.cpu_metadata[prim_index.0].clip_task_id = Some(clip_task_id);
         pic_state.tasks.push(clip_task_id);
 
         true
     }
 
     pub fn prepare_prim_for_render(
         &mut self,
@@ -2336,16 +2422,19 @@ impl PrimitiveStore {
 
         // Do some basic checks first, that can early out
         // without even knowing the local rect.
         let (prim_kind, cpu_prim_index) = {
             let metadata = &self.cpu_metadata[prim_index.0];
 
             if !metadata.is_backface_visible &&
                prim_run_context.scroll_node.world_content_transform.is_backface_visible() {
+                if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+                    println!("\tculled for not having visible back faces");
+                }
                 return None;
             }
 
             (metadata.prim_kind, metadata.cpu_prim_index)
         };
 
         // If we have dependencies, we need to prepare them first, in order
         // to know the actual rect of this primitive.
@@ -2353,16 +2442,19 @@ impl PrimitiveStore {
         // local space, which may force us to render this item on a larger
         // picture target, if being composited.
         if let PrimitiveKind::Brush = prim_kind {
             if let BrushKind::Picture { pic_index, .. } = self.cpu_brushes[cpu_prim_index.0].kind {
                 let pic_context_for_children = {
                     let pic = &mut self.pictures[pic_index.0];
 
                     if !pic.resolve_scene_properties(frame_context.scene_properties) {
+                        if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+                            println!("\tculled for carrying an invisible composite filter");
+                        }
                         return None;
                     }
 
                     may_need_clip_mask = pic.composite_mode.is_some();
 
                     let inflation_factor = match pic.composite_mode {
                         Some(PictureCompositeMode::Filter(FilterOp::Blur(blur_radius))) => {
                             // The amount of extra space needed for primitives inside
@@ -2424,45 +2516,70 @@ impl PrimitiveStore {
                 }
             }
         }
 
         let (local_rect, unclipped_device_rect) = {
             let metadata = &mut self.cpu_metadata[prim_index.0];
             if metadata.local_rect.size.width <= 0.0 ||
                metadata.local_rect.size.height <= 0.0 {
-                //warn!("invalid primitive rect {:?}", metadata.local_rect);
+                if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+                    println!("\tculled for zero local rectangle");
+                }
                 return None;
             }
 
             metadata.screen_rect = None;
 
             // Inflate the local rect for this primitive by the inflation factor of
             // the picture context. This ensures that even if the primitive itself
             // is not visible, any effects from the blur radius will be correctly
             // taken into account.
             let local_rect = metadata.local_rect
                 .inflate(pic_context.inflation_factor, pic_context.inflation_factor)
-                .intersection(&metadata.local_clip_rect)?;
+                .intersection(&metadata.local_clip_rect);
+            let local_rect = match local_rect {
+                Some(local_rect) => local_rect,
+                None => {
+                    if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+                        println!("\tculled for being out of the local clip rectangle: {:?}",
+                            metadata.local_clip_rect);
+                    }
+                    return None
+                }
+            };
 
-            let unclipped = calculate_screen_bounding_rect(
+            let unclipped = match calculate_screen_bounding_rect(
                 &prim_run_context.scroll_node.world_content_transform,
                 &local_rect,
                 frame_context.device_pixel_scale,
                 None, //TODO: inflate `frame_context.screen_rect` appropriately
-            )?;
+            ) {
+                Some(rect) => rect,
+                None => {
+                    if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+                        println!("\tculled for being behind the near plane of transform: {:?}",
+                            prim_run_context.scroll_node.world_content_transform);
+                    }
+                    return None
+                }
+            };
 
             let clipped = unclipped
                 .intersection(&prim_run_context.clip_chain.combined_outer_screen_rect)?;
 
             metadata.screen_rect = Some(ScreenRect {
                 clipped,
                 unclipped,
             });
-            metadata.clip_chain_rect_index = prim_run_context.clip_chain_rect_index;
+
+            metadata.combined_local_clip_rect = prim_run_context
+                .local_clip_rect
+                .intersection(&metadata.local_clip_rect)
+                .unwrap_or(LayoutRect::zero());
 
             (local_rect, unclipped)
         };
 
         self.build_prim_segments_if_needed(
             prim_index,
             pic_state,
             frame_state,
@@ -2475,16 +2592,20 @@ impl PrimitiveStore {
             &unclipped_device_rect,
             pic_state,
             frame_context,
             frame_state,
         ) {
             return None;
         }
 
+        if cfg!(debug_assertions) && Some(prim_index) == self.chase_id {
+            println!("\tconsidered visible and ready with local rect {:?}", local_rect);
+        }
+
         self.prepare_prim_for_render_inner(
             prim_index,
             prim_run_context,
             pic_state_for_children,
             pic_context,
             pic_state,
             frame_context,
             frame_state,
@@ -2509,16 +2630,20 @@ impl PrimitiveStore {
         frame_state: &mut FrameBuildingState,
     ) -> PrimitiveRunLocalRect {
         let mut result = PrimitiveRunLocalRect {
             local_rect_in_actual_parent_space: LayoutRect::zero(),
             local_rect_in_original_parent_space: LayoutRect::zero(),
         };
 
         for run in &pic_context.prim_runs {
+            if run.is_chasing(self.chase_id) {
+                println!("\tpreparing a run of length {} in pipeline {:?}",
+                    run.count, pic_context.pipeline_id);
+            }
             // TODO(gw): Perhaps we can restructure this to not need to create
             //           a new primitive context for every run (if the hash
             //           lookups ever show up in a profile).
             let scroll_node = &frame_context
                 .clip_scroll_tree
                 .nodes[run.clip_and_scroll.scroll_node_id.0];
             let clip_chain = frame_context
                 .clip_scroll_tree
@@ -2526,22 +2651,26 @@ impl PrimitiveStore {
 
             // Mark whether this picture contains any complex coordinate
             // systems, due to either the scroll node or the clip-chain.
             pic_state.has_non_root_coord_system |=
                 scroll_node.coordinate_system_id != CoordinateSystemId::root();
             pic_state.has_non_root_coord_system |= clip_chain.has_non_root_coord_system;
 
             if !scroll_node.invertible {
-                debug!("{:?} {:?}: position not invertible", run.base_prim_index, pic_context.pipeline_id);
+                if run.is_chasing(self.chase_id) {
+                    println!("\tculled for the scroll node transform being invertible");
+                }
                 continue;
             }
 
             if clip_chain.combined_outer_screen_rect.is_empty() {
-                debug!("{:?} {:?}: clipped out", run.base_prim_index, pic_context.pipeline_id);
+                if run.is_chasing(self.chase_id) {
+                    println!("\tculled for out of screen bounds");
+                }
                 continue;
             }
 
             let parent_relative_transform = pic_context
                 .inv_world_transform
                 .map(|inv_parent| {
                     inv_parent.pre_mul(&scroll_node.world_content_transform)
                 });
@@ -2560,29 +2689,26 @@ impl PrimitiveStore {
                 });
 
             let clip_chain_rect = if pic_context.apply_local_clip_rect {
                 get_local_clip_rect_for_nodes(scroll_node, clip_chain)
             } else {
                 None
             };
 
-            let clip_chain_rect_index = match clip_chain_rect {
+            let local_clip_chain_rect = match clip_chain_rect {
                 Some(rect) if rect.is_empty() => continue,
-                Some(rect) => {
-                    frame_state.local_clip_rects.push(rect);
-                    ClipChainRectIndex(frame_state.local_clip_rects.len() - 1)
-                }
-                None => ClipChainRectIndex(0), // This is no clipping.
+                Some(rect) => rect,
+                None => frame_context.max_local_clip,
             };
 
             let child_prim_run_context = PrimitiveRunContext::new(
                 clip_chain,
                 scroll_node,
-                clip_chain_rect_index,
+                local_clip_chain_rect,
             );
 
             for i in 0 .. run.count {
                 let prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
 
                 if let Some(prim_local_rect) = self.prepare_prim_for_render(
                     prim_index,
                     &child_prim_run_context,
@@ -2606,16 +2732,22 @@ impl PrimitiveStore {
                         result.local_rect_in_actual_parent_space =
                             result.local_rect_in_actual_parent_space.union(&bounds);
                     }
                     if let Some(ref matrix) = original_relative_transform {
                         let bounds = matrix.transform_rect(&clipped_rect);
                         result.local_rect_in_original_parent_space =
                             result.local_rect_in_original_parent_space.union(&bounds);
                     }
+
+                    if let Some(ref matrix) = parent_relative_transform {
+                        let bounds = matrix.transform_rect(&prim_local_rect);
+                        result.local_rect_in_actual_parent_space =
+                            result.local_rect_in_actual_parent_space.union(&bounds);
+                    }
                 }
             }
         }
 
         result
     }
 }
 
@@ -2641,49 +2773,54 @@ fn build_gradient_stops_request(
 fn decompose_repeated_primitive(
     visible_tiles: &mut Vec<VisibleGradientTile>,
     metadata: &mut PrimitiveMetadata,
     stretch_size: &LayoutSize,
     tile_spacing: &LayoutSize,
     prim_run_context: &PrimitiveRunContext,
     frame_context: &FrameBuildingContext,
     frame_state: &mut FrameBuildingState,
-    callback: &mut FnMut(&LayoutRect, &LayoutRect, GpuDataRequest),
+    callback: &mut FnMut(&LayoutRect, GpuDataRequest),
 ) {
     visible_tiles.clear();
 
     // Tighten the clip rect because decomposing the repeated image can
     // produce primitives that are partially covering the original image
     // rect and we want to clip these extra parts out.
-    let tight_clip_rect = metadata.local_clip_rect.intersection(&metadata.local_rect).unwrap();
+    let tight_clip_rect = metadata
+        .combined_local_clip_rect
+        .intersection(&metadata.local_rect).unwrap();
 
     let visible_rect = compute_conservative_visible_rect(
         prim_run_context,
         frame_context,
         &tight_clip_rect
     );
     let stride = *stretch_size + *tile_spacing;
 
     for_each_repetition(
         &metadata.local_rect,
         &visible_rect,
         &stride,
         &mut |origin, _| {
 
             let mut handle = GpuCacheHandle::new();
+            let rect = LayoutRect {
+                origin: *origin,
+                size: *stretch_size,
+            };
             if let Some(request) = frame_state.gpu_cache.request(&mut handle) {
-                let rect = LayoutRect {
-                    origin: *origin,
-                    size: *stretch_size,
-                };
-
-                callback(&rect, &tight_clip_rect, request);
+                callback(&rect, request);
             }
 
-            visible_tiles.push(VisibleGradientTile { handle });
+            visible_tiles.push(VisibleGradientTile {
+                local_rect: rect,
+                local_clip_rect: tight_clip_rect,
+                handle
+            });
         }
     );
 
     if visible_tiles.is_empty() {
         // At this point if we don't have tiles to show it means we could probably
         // have done a better a job at culling during an earlier stage.
         // Clearing the screen rect has the effect of "culling out" the primitive
         // from the point of view of the batch builder, and ensures we don't hit
@@ -2720,41 +2857,16 @@ fn edge_flags_for_tile_spacing(tile_spac
     }
     if tile_spacing.height > 0.0 {
         flags |= EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::BOTTOM;
     }
 
     flags
 }
 
-//Test for one clip region contains another
-trait InsideTest<T> {
-    fn might_contain(&self, clip: &T) -> bool;
-}
-
-impl InsideTest<ComplexClipRegion> for ComplexClipRegion {
-    // Returns true if clip is inside self, can return false negative
-    fn might_contain(&self, clip: &ComplexClipRegion) -> bool {
-        let delta_left = clip.rect.origin.x - self.rect.origin.x;
-        let delta_top = clip.rect.origin.y - self.rect.origin.y;
-        let delta_right = self.rect.max_x() - clip.rect.max_x();
-        let delta_bottom = self.rect.max_y() - clip.rect.max_y();
-
-        delta_left >= 0f32 && delta_top >= 0f32 && delta_right >= 0f32 && delta_bottom >= 0f32 &&
-            clip.radii.top_left.width >= self.radii.top_left.width - delta_left &&
-            clip.radii.top_left.height >= self.radii.top_left.height - delta_top &&
-            clip.radii.top_right.width >= self.radii.top_right.width - delta_right &&
-            clip.radii.top_right.height >= self.radii.top_right.height - delta_top &&
-            clip.radii.bottom_left.width >= self.radii.bottom_left.width - delta_left &&
-            clip.radii.bottom_left.height >= self.radii.bottom_left.height - delta_bottom &&
-            clip.radii.bottom_right.width >= self.radii.bottom_right.width - delta_right &&
-            clip.radii.bottom_right.height >= self.radii.bottom_right.height - delta_bottom
-    }
-}
-
 fn convert_clip_chain_to_clip_vector(
     clip_chain_nodes: ClipChainNodeRef,
     extra_clip: ClipChainNodeRef,
     combined_outer_rect: &DeviceIntRect,
     combined_inner_rect: &mut DeviceIntRect,
     prim_coordinate_system: CoordinateSystemId,
     has_clips_from_other_coordinate_systems: &mut bool,
 ) -> Vec<ClipWorkItem> {
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -1058,38 +1058,42 @@ impl RenderBackend {
             // scroll at the same time. we should keep track of the fact that we skipped
             // composition here and do it as soon as we receive the scene.
             op.render = false;
             op.composite = false;
         }
 
         debug_assert!(op.render || !op.composite);
 
+        let mut render_time = None;
         if op.render && doc.has_pixels() {
             profile_scope!("generate frame");
 
             *frame_counter += 1;
 
             // borrow ck hack for profile_counters
             let (pending_update, rendered_document) = {
                 let _timer = profile_counters.total_time.timer();
+                let render_start_time = precise_time_ns();
 
                 let rendered_document = doc.render(
                     &mut self.resource_cache,
                     &mut self.gpu_cache,
                     &mut profile_counters.resources,
                     op.build || has_built_scene,
                 );
 
                 debug!("generated frame for document {:?} with {} passes",
                     document_id, rendered_document.frame.passes.len());
 
                 let msg = ResultMsg::UpdateGpuCache(self.gpu_cache.extract_updates());
                 self.result_tx.send(msg).unwrap();
 
+                render_time = Some(precise_time_ns() - render_start_time);
+
                 let pending_update = self.resource_cache.pending_updates();
                 (pending_update, rendered_document)
             };
 
             let msg = ResultMsg::PublishPipelineInfo(doc.updated_pipeline_info());
             self.result_tx.send(msg).unwrap();
 
             // Publish the frame
@@ -1106,17 +1110,17 @@ impl RenderBackend {
             // there's no pixels. We still want to pretend to render and request
             // a composite to make sure that the callbacks (particularly the
             // new_frame_ready callback below) has the right flags.
             let msg = ResultMsg::PublishPipelineInfo(doc.updated_pipeline_info());
             self.result_tx.send(msg).unwrap();
         }
 
         if transaction_msg.generate_frame {
-            self.notifier.new_frame_ready(document_id, op.scroll, op.composite);
+            self.notifier.new_frame_ready(document_id, op.scroll, op.composite, render_time);
         }
     }
 
     #[cfg(not(feature = "debugger"))]
     fn get_docs_for_debugger(&self) -> String {
         String::new()
     }
 
@@ -1405,14 +1409,14 @@ impl RenderBackend {
                 id,
                 render_doc,
                 self.resource_cache.pending_updates(),
                 profile_counters.clone(),
             );
             self.result_tx.send(msg_publish).unwrap();
             profile_counters.reset();
 
-            self.notifier.new_frame_ready(id, false, true);
+            self.notifier.new_frame_ready(id, false, true, None);
             self.documents.insert(id, doc);
         }
     }
 }
 
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -21,17 +21,17 @@ use batch::{BatchKind, BatchTextures, Br
 use capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage};
 use debug_colors;
 use device::{DepthFunction, Device, FrameId, Program, UploadMethod, Texture, PBO};
 use device::{ExternalTexture, FBOId, TextureSlot};
 use device::{FileWatcherHandler, ShaderError, TextureFilter,
              VertexUsageHint, VAO, VBO, CustomVAO};
 use device::{ProgramCache, ReadPixelsFormat};
 use euclid::{rect, Transform3D};
-use frame_builder::FrameBuilderConfig;
+use frame_builder::{ChasePrimitive, FrameBuilderConfig};
 use gleam::gl;
 use glyph_rasterizer::{GlyphFormat, GlyphRasterizer};
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 #[cfg(feature = "pathfinder")]
 use gpu_glyph_renderer::GpuGlyphRenderer;
 use internal_types::{SourceTexture, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE, ResourceCacheError};
 use internal_types::{CacheTextureId, DebugOutput, FastHashMap, RenderedDocument, ResultMsg};
 use internal_types::{TextureUpdateList, TextureUpdateOp, TextureUpdateSource};
@@ -265,24 +265,25 @@ impl From<GlyphFormat> for ShaderColorMo
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub(crate) enum TextureSampler {
     Color0,
     Color1,
     Color2,
     CacheA8,
     CacheRGBA8,
     ResourceCache,
-    ClipScrollNodes,
+    TransformPalette,
     RenderTasks,
     Dither,
     // A special sampler that is bound to the A8 output of
     // the *first* pass. Items rendered in this target are
     // available as inputs to tasks in any subsequent pass.
     SharedCacheA8,
-    LocalClipRects
+    PrimitiveHeadersF,
+    PrimitiveHeadersI,
 }
 
 impl TextureSampler {
     pub(crate) fn color(n: usize) -> TextureSampler {
         match n {
             0 => TextureSampler::Color0,
             1 => TextureSampler::Color1,
             2 => TextureSampler::Color2,
@@ -297,21 +298,22 @@ impl Into<TextureSlot> for TextureSample
     fn into(self) -> TextureSlot {
         match self {
             TextureSampler::Color0 => TextureSlot(0),
             TextureSampler::Color1 => TextureSlot(1),
             TextureSampler::Color2 => TextureSlot(2),
             TextureSampler::CacheA8 => TextureSlot(3),
             TextureSampler::CacheRGBA8 => TextureSlot(4),
             TextureSampler::ResourceCache => TextureSlot(5),
-            TextureSampler::ClipScrollNodes => TextureSlot(6),
+            TextureSampler::TransformPalette => TextureSlot(6),
             TextureSampler::RenderTasks => TextureSlot(7),
             TextureSampler::Dither => TextureSlot(8),
             TextureSampler::SharedCacheA8 => TextureSlot(9),
-            TextureSampler::LocalClipRects => TextureSlot(10),
+            TextureSampler::PrimitiveHeadersF => TextureSlot(10),
+            TextureSampler::PrimitiveHeadersI => TextureSlot(11),
         }
     }
 }
 
 #[derive(Debug, Clone, Copy)]
 #[repr(C)]
 pub struct PackedVertex {
     pub pos: [f32; 2],
@@ -325,22 +327,17 @@ pub(crate) mod desc {
             VertexAttribute {
                 name: "aPosition",
                 count: 2,
                 kind: VertexAttributeKind::F32,
             },
         ],
         instance_attributes: &[
             VertexAttribute {
-                name: "aData0",
-                count: 4,
-                kind: VertexAttributeKind::I32,
-            },
-            VertexAttribute {
-                name: "aData1",
+                name: "aData",
                 count: 4,
                 kind: VertexAttributeKind::I32,
             },
         ],
     };
 
     pub const BLUR: VertexDescriptor = VertexDescriptor {
         vertex_attributes: &[
@@ -1210,18 +1207,24 @@ impl CacheTexture {
 }
 
 struct VertexDataTexture {
     texture: Texture,
     pbo: PBO,
 }
 
 impl VertexDataTexture {
-    fn new(device: &mut Device) -> VertexDataTexture {
-        let texture = device.create_texture(TextureTarget::Default, ImageFormat::RGBAF32);
+    fn new(
+        device: &mut Device,
+        format: ImageFormat,
+    ) -> VertexDataTexture {
+        let texture = device.create_texture(
+            TextureTarget::Default,
+            format,
+        );
         let pbo = device.create_pbo();
 
         VertexDataTexture { texture, pbo }
     }
 
     fn update<T>(&mut self, device: &mut Device, data: &mut Vec<T>) {
         if data.is_empty() {
             return;
@@ -1364,18 +1367,19 @@ pub struct Renderer {
     #[cfg(feature = "debug_renderer")]
     new_scene_indicator: ChangeIndicator,
 
     last_time: u64,
 
     pub gpu_profile: GpuProfiler<GpuProfileTag>,
     vaos: RendererVAOs,
 
-    node_data_texture: VertexDataTexture,
-    local_clip_rects_texture: VertexDataTexture,
+    prim_header_f_texture: VertexDataTexture,
+    prim_header_i_texture: VertexDataTexture,
+    transforms_texture: VertexDataTexture,
     render_task_texture: VertexDataTexture,
     gpu_cache_texture: CacheTexture,
 
     gpu_cache_frame_id: FrameId,
     gpu_cache_overflow: bool,
 
     pipeline_info: PipelineInfo,
 
@@ -1624,19 +1628,20 @@ impl Renderer {
         let border_vao =
             device.create_vao_with_new_instances(&desc::BORDER, &prim_vao);
         let dash_and_dot_vao =
             device.create_vao_with_new_instances(&desc::BORDER_CORNER_DASH_AND_DOT, &prim_vao);
         let texture_cache_upload_pbo = device.create_pbo();
 
         let texture_resolver = SourceTextureResolver::new(&mut device);
 
-        let node_data_texture = VertexDataTexture::new(&mut device);
-        let local_clip_rects_texture = VertexDataTexture::new(&mut device);
-        let render_task_texture = VertexDataTexture::new(&mut device);
+        let prim_header_f_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
+        let prim_header_i_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAI32);
+        let transforms_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
+        let render_task_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
 
         let gpu_cache_texture = CacheTexture::new(
             &mut device,
             options.scatter_gpu_cache_updates,
         )?;
 
         device.end_frame();
 
@@ -1648,16 +1653,17 @@ impl Renderer {
             (false, _) => FontRenderMode::Mono,
         };
 
         let config = FrameBuilderConfig {
             enable_scrollbars: options.enable_scrollbars,
             default_font_render_mode,
             dual_source_blending_is_enabled: true,
             dual_source_blending_is_supported: ext_dual_source_blending,
+            chase_primitive: options.chase_primitive,
         };
 
         let device_pixel_ratio = options.device_pixel_ratio;
         // First set the flags to default and later call set_debug_flags to ensure any
         // potential transition when enabling a flag is run.
         let debug_flags = DebugFlags::default();
         let payload_rx_for_backend = payload_rx.to_mpsc_receiver();
         let recorder = options.recorder;
@@ -1779,18 +1785,19 @@ impl Renderer {
             gpu_glyph_renderer,
             vaos: RendererVAOs {
                 prim_vao,
                 blur_vao,
                 clip_vao,
                 dash_and_dot_vao,
                 border_vao,
             },
-            node_data_texture,
-            local_clip_rects_texture,
+            transforms_texture,
+            prim_header_i_texture,
+            prim_header_f_texture,
             render_task_texture,
             pipeline_info: PipelineInfo::default(),
             dither_matrix_texture,
             external_image_handler: None,
             output_image_handler: None,
             output_targets: FastHashMap::default(),
             cpu_profiles: VecDeque::new(),
             gpu_profiles: VecDeque::new(),
@@ -3528,26 +3535,41 @@ impl Renderer {
             is_shared: list.is_shared,
         })
     }
 
     fn bind_frame_data(&mut self, frame: &mut Frame) {
         let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_DATA);
         self.device.set_device_pixel_ratio(frame.device_pixel_ratio);
 
-        self.node_data_texture.update(&mut self.device, &mut frame.node_data);
-        self.device.bind_texture(TextureSampler::ClipScrollNodes, &self.node_data_texture.texture);
-
-        self.local_clip_rects_texture.update(
+        self.prim_header_f_texture.update(
             &mut self.device,
-            &mut frame.clip_chain_local_clip_rects
+            &mut frame.prim_headers.headers_float,
         );
         self.device.bind_texture(
-            TextureSampler::LocalClipRects,
-            &self.local_clip_rects_texture.texture
+            TextureSampler::PrimitiveHeadersF,
+            &self.prim_header_f_texture.texture,
+        );
+
+        self.prim_header_i_texture.update(
+            &mut self.device,
+            &mut frame.prim_headers.headers_int,
+        );
+        self.device.bind_texture(
+            TextureSampler::PrimitiveHeadersI,
+            &self.prim_header_i_texture.texture,
+        );
+
+        self.transforms_texture.update(
+            &mut self.device,
+            &mut frame.transform_palette,
+        );
+        self.device.bind_texture(
+            TextureSampler::TransformPalette,
+            &self.transforms_texture.texture,
         );
 
         self.render_task_texture
             .update(&mut self.device, &mut frame.render_tasks.task_data);
         self.device.bind_texture(
             TextureSampler::RenderTasks,
             &self.render_task_texture.texture,
         );
@@ -3924,18 +3946,19 @@ impl Renderer {
     // De-initialize the Renderer safely, assuming the GL is still alive and active.
     pub fn deinit(mut self) {
         //Note: this is a fake frame, only needed because texture deletion is require to happen inside a frame
         self.device.begin_frame();
         self.gpu_cache_texture.deinit(&mut self.device);
         if let Some(dither_matrix_texture) = self.dither_matrix_texture {
             self.device.delete_texture(dither_matrix_texture);
         }
-        self.node_data_texture.deinit(&mut self.device);
-        self.local_clip_rects_texture.deinit(&mut self.device);
+        self.transforms_texture.deinit(&mut self.device);
+        self.prim_header_f_texture.deinit(&mut self.device);
+        self.prim_header_i_texture.deinit(&mut self.device);
         self.render_task_texture.deinit(&mut self.device);
         self.device.delete_pbo(self.texture_cache_upload_pbo);
         self.texture_resolver.deinit(&mut self.device);
         self.device.delete_vao(self.vaos.prim_vao);
         self.device.delete_vao(self.vaos.clip_vao);
         self.device.delete_vao(self.vaos.blur_vao);
         self.device.delete_vao(self.vaos.dash_and_dot_vao);
         self.device.delete_vao(self.vaos.border_vao);
@@ -4012,21 +4035,21 @@ pub trait ThreadListener {
 
 /// Allows callers to hook in at certain points of the async scene build. These
 /// functions are all called from the scene builder thread.
 pub trait SceneBuilderHooks {
     /// This is called exactly once, when the scene builder thread is started
     /// and before it processes anything.
     fn register(&self);
     /// This is called before each scene swap occurs.
-    fn pre_scene_swap(&self);
+    fn pre_scene_swap(&self, scenebuild_time: u64);
     /// This is called after each scene swap occurs. The PipelineInfo contains
     /// the updated epochs and pipelines removed in the new scene compared to
     /// the old scene.
-    fn post_scene_swap(&self, info: PipelineInfo);
+    fn post_scene_swap(&self, info: PipelineInfo, sceneswap_time: u64);
     /// This is called after a resource update operation on the scene builder
     /// thread, in the case where resource updates were applied without a scene
     /// build.
     fn post_resource_update(&self);
     /// This is a generic callback which provides an opportunity to run code
     /// on the scene builder thread. This is called as part of the main message
     /// loop of the scene builder thread, but outside of any specific message
     /// handler.
@@ -4073,16 +4096,17 @@ pub struct RendererOptions {
     pub thread_listener: Option<Box<ThreadListener + Send + Sync>>,
     pub enable_render_on_scroll: bool,
     pub cached_programs: Option<Rc<ProgramCache>>,
     pub debug_flags: DebugFlags,
     pub renderer_id: Option<u64>,
     pub disable_dual_source_blending: bool,
     pub scene_builder_hooks: Option<Box<SceneBuilderHooks + Send>>,
     pub sampler: Option<Box<AsyncPropertySampler + Send>>,
+    pub chase_primitive: ChasePrimitive,
 }
 
 impl Default for RendererOptions {
     fn default() -> Self {
         RendererOptions {
             device_pixel_ratio: 1.0,
             resource_override_path: None,
             enable_aa: true,
@@ -4106,16 +4130,17 @@ impl Default for RendererOptions {
             recorder: None,
             thread_listener: None,
             enable_render_on_scroll: true,
             renderer_id: None,
             cached_programs: None,
             disable_dual_source_blending: false,
             scene_builder_hooks: None,
             sampler: None,
+            chase_primitive: ChasePrimitive::Nothing,
         }
     }
 }
 
 #[cfg(not(feature = "debugger"))]
 pub struct DebugServer;
 
 #[cfg(not(feature = "debugger"))]
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -636,23 +636,30 @@ impl ResourceCache {
             // the texture cache entry has been evicted, treat it as all dirty
             Some(template.descriptor.full_rect())
         } else if needs_update {
             template.dirty_rect
         } else {
             return
         };
 
+        if !self.pending_image_requests.insert(request) {
+            return
+        }
+
+        // If we are tiling, then we need to confirm the dirty rect intersects
+        // the tile before leaving the request in the pending queue.
+        //
         // We can start a worker thread rasterizing right now, if:
         //  - The image is a blob.
         //  - The blob hasn't already been requested this frame.
-        if self.pending_image_requests.insert(request) && template.data.is_blob() {
-            let (offset, size) = match template.tiling {
-                Some(tile_size) => {
-                    let tile_offset = request.tile.unwrap();
+        if template.data.is_blob() || dirty_rect.is_some() {
+            let (offset, size) = match request.tile {
+                Some(tile_offset) => {
+                    let tile_size = template.tiling.unwrap();
                     let actual_size = compute_tile_size(
                         &template.descriptor,
                         tile_size,
                         tile_offset,
                     );
 
                     if let Some(dirty) = dirty_rect {
                         if intersect_for_tile(dirty, actual_size, tile_size, tile_offset).is_none() {
@@ -666,27 +673,29 @@ impl ResourceCache {
                         tile_offset.x as f32 * tile_size as f32,
                         tile_offset.y as f32 * tile_size as f32,
                     );
                     (offset, actual_size)
                 }
                 None => (DevicePoint::zero(), template.descriptor.size),
             };
 
-            if let Some(ref mut renderer) = self.blob_image_renderer {
-                renderer.request(
-                    &self.resources,
-                    request.into(),
-                    &BlobImageDescriptor {
-                        size,
-                        offset,
-                        format: template.descriptor.format,
-                    },
-                    dirty_rect,
-                );
+            if template.data.is_blob() {
+                if let Some(ref mut renderer) = self.blob_image_renderer {
+                    renderer.request(
+                        &self.resources,
+                        request.into(),
+                        &BlobImageDescriptor {
+                            size,
+                            offset,
+                            format: template.descriptor.format,
+                        },
+                        dirty_rect,
+                    );
+                }
             }
         }
     }
 
     pub fn request_glyphs(
         &mut self,
         mut font: FontInstance,
         glyph_keys: &[GlyphKey],
@@ -918,16 +927,18 @@ impl ResourceCache {
             gpu_cache,
             &mut self.texture_cache,
             render_tasks,
         );
         self.texture_cache.end_frame(texture_cache_profile);
     }
 
     fn update_texture_cache(&mut self, gpu_cache: &mut GpuCache) {
+        let mut keys_to_clear_dirty_rect = FastHashSet::default();
+
         for request in self.pending_image_requests.drain() {
             let image_template = self.resources.image_templates.get_mut(request.key).unwrap();
             debug_assert!(image_template.data.uses_texture_cache());
 
             let image_data = match image_template.data {
                 ImageData::Raw(..) | ImageData::External(..) => {
                     // Safe to clone here since the Raw image data is an
                     // Arc, and the external image data is small.
@@ -958,48 +969,51 @@ impl ResourceCache {
                             panic!("Vector image error {}", msg);
                         }
                     }
                 }
             };
 
             let entry = self.cached_images.get_mut(&request).as_mut().unwrap();
             let mut descriptor = image_template.descriptor.clone();
-            //TODO: erasing the dirty rectangle here is incorrect for tiled images,
-            // since other tile requests may follow that depend on it
-            let mut local_dirty_rect = image_template.dirty_rect.take();
+            let local_dirty_rect;
 
             if let Some(tile) = request.tile {
                 let tile_size = image_template.tiling.unwrap();
                 let clipped_tile_size = compute_tile_size(&descriptor, tile_size, tile);
 
-                if let Some(ref mut rect) = local_dirty_rect {
-                    match intersect_for_tile(*rect, clipped_tile_size, tile_size, tile) {
-                        Some(intersection) => *rect = intersection,
-                        None => {
-                            // if re-uploaded, the dirty rect is ignored anyway
-                            debug_assert!(self.texture_cache.needs_upload(&entry.texture_cache_handle))
-                        }
-                    }
-                }
+                local_dirty_rect = if let Some(ref rect) = image_template.dirty_rect {
+                    keys_to_clear_dirty_rect.insert(request.key.clone());
+
+                    // We should either have a dirty rect, or we are re-uploading where the dirty
+                    // rect is ignored anyway.
+                    let intersection = intersect_for_tile(*rect, clipped_tile_size, tile_size, tile);
+                    debug_assert!(intersection.is_some() ||
+                                  self.texture_cache.needs_upload(&entry.texture_cache_handle));
+                    intersection
+                } else {
+                    None
+                };
 
                 // The tiled image could be stored on the CPU as one large image or be
                 // already broken up into tiles. This affects the way we compute the stride
                 // and offset.
                 let tiled_on_cpu = image_template.data.is_blob();
                 if !tiled_on_cpu {
                     let bpp = descriptor.format.bytes_per_pixel();
                     let stride = descriptor.compute_stride();
                     descriptor.stride = Some(stride);
                     descriptor.offset +=
                         tile.y as u32 * tile_size as u32 * stride +
                         tile.x as u32 * tile_size as u32 * bpp;
                 }
 
                 descriptor.size = clipped_tile_size;
+            } else {
+                local_dirty_rect = image_template.dirty_rect.take();
             }
 
             let filter = match request.rendering {
                 ImageRendering::Pixelated => {
                     TextureFilter::Nearest
                 }
                 ImageRendering::Auto | ImageRendering::CrispEdges => {
                     // If the texture uses linear filtering, enable mipmaps and
@@ -1031,16 +1045,21 @@ impl ResourceCache {
                 Some(image_data),
                 [0.0; 3],
                 local_dirty_rect,
                 gpu_cache,
                 None,
                 UvRectKind::Rect,
             );
         }
+
+        for key in keys_to_clear_dirty_rect.drain() {
+            let image_template = self.resources.image_templates.get_mut(key).unwrap();
+            image_template.dirty_rect.take();
+        }
     }
 
     pub fn end_frame(&mut self) {
         debug_assert_eq!(self.state, State::QueryResources);
         self.state = State::Idle;
     }
 
     pub fn clear(&mut self, what: ClearCache) {
--- a/gfx/webrender/src/scene_builder.rs
+++ b/gfx/webrender/src/scene_builder.rs
@@ -8,16 +8,17 @@ use display_list_flattener::build_scene;
 use frame_builder::{FrameBuilderConfig, FrameBuilder};
 use clip_scroll_tree::ClipScrollTree;
 use internal_types::FastHashSet;
 use resource_cache::FontInstanceMap;
 use render_backend::DocumentView;
 use renderer::{PipelineInfo, SceneBuilderHooks};
 use scene::Scene;
 use std::sync::mpsc::{channel, Receiver, Sender};
+use time::precise_time_ns;
 
 // Message from render backend to scene builder.
 pub enum SceneBuilderRequest {
     Transaction {
         document_id: DocumentId,
         scene: Option<SceneRequest>,
         resource_updates: Vec<ResourceUpdate>,
         frame_ops: Vec<FrameMsg>,
@@ -132,16 +133,17 @@ impl SceneBuilder {
             }
             SceneBuilderRequest::Transaction {
                 document_id,
                 scene,
                 resource_updates,
                 frame_ops,
                 render,
             } => {
+                let scenebuild_start_time = precise_time_ns();
                 let built_scene = scene.map(|request|{
                     build_scene(&self.config, request)
                 });
 
                 // TODO: pre-rasterization.
 
                 // We only need the pipeline info and the result channel if we
                 // have a hook callback *and* if this transaction actually built
@@ -151,39 +153,42 @@ impl SceneBuilder {
                 let (pipeline_info, result_tx, result_rx) = match (&self.hooks, &built_scene) {
                     (&Some(ref hooks), &Some(ref built)) => {
                         let info = PipelineInfo {
                             epochs: built.scene.pipeline_epochs.clone(),
                             removed_pipelines: built.removed_pipelines.clone(),
                         };
                         let (tx, rx) = channel();
 
-                        hooks.pre_scene_swap();
+                        let scenebuild_time = precise_time_ns() - scenebuild_start_time;
+                        hooks.pre_scene_swap(scenebuild_time);
 
                         (Some(info), Some(tx), Some(rx))
                     }
                     _ => (None, None, None),
                 };
 
+                let sceneswap_start_time = precise_time_ns();
                 let has_resources_updates = !resource_updates.is_empty();
                 self.tx.send(SceneBuilderResult::Transaction {
                     document_id,
                     built_scene,
                     resource_updates,
                     frame_ops,
                     render,
                     result_tx,
                 }).unwrap();
 
                 let _ = self.api_tx.send(ApiMsg::WakeUp);
 
                 if let Some(pipeline_info) = pipeline_info {
                     // Block until the swap is done, then invoke the hook.
                     let swap_result = result_rx.unwrap().recv();
-                    self.hooks.as_ref().unwrap().post_scene_swap(pipeline_info);
+                    let sceneswap_time = precise_time_ns() - sceneswap_start_time;
+                    self.hooks.as_ref().unwrap().post_scene_swap(pipeline_info, sceneswap_time);
                     // Once the hook is done, allow the RB thread to resume
                     match swap_result {
                         Ok(SceneSwapResult::Complete(resume_tx)) => {
                             resume_tx.send(()).ok();
                         },
                         _ => (),
                     };
                 } else if has_resources_updates {
--- a/gfx/webrender/src/shade.rs
+++ b/gfx/webrender/src/shade.rs
@@ -346,21 +346,22 @@ fn create_prim_shader(
             program,
             &[
                 ("sColor0", TextureSampler::Color0),
                 ("sColor1", TextureSampler::Color1),
                 ("sColor2", TextureSampler::Color2),
                 ("sDither", TextureSampler::Dither),
                 ("sCacheA8", TextureSampler::CacheA8),
                 ("sCacheRGBA8", TextureSampler::CacheRGBA8),
-                ("sClipScrollNodes", TextureSampler::ClipScrollNodes),
+                ("sTransformPalette", TextureSampler::TransformPalette),
                 ("sRenderTasks", TextureSampler::RenderTasks),
                 ("sResourceCache", TextureSampler::ResourceCache),
                 ("sSharedCacheA8", TextureSampler::SharedCacheA8),
-                ("sLocalClipRects", TextureSampler::LocalClipRects),
+                ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
+                ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
             ],
         );
     }
 
     program
 }
 
 fn create_clip_shader(name: &'static str, device: &mut Device) -> Result<Program, ShaderError> {
@@ -374,21 +375,22 @@ fn create_clip_shader(name: &'static str
 
     let program = device.create_program(name, &prefix, &desc::CLIP);
 
     if let Ok(ref program) = program {
         device.bind_shader_samplers(
             program,
             &[
                 ("sColor0", TextureSampler::Color0),
-                ("sClipScrollNodes", TextureSampler::ClipScrollNodes),
+                ("sTransformPalette", TextureSampler::TransformPalette),
                 ("sRenderTasks", TextureSampler::RenderTasks),
                 ("sResourceCache", TextureSampler::ResourceCache),
                 ("sSharedCacheA8", TextureSampler::SharedCacheA8),
-                ("sLocalClipRects", TextureSampler::LocalClipRects),
+                ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
+                ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
             ],
         );
     }
 
     program
 }
 
 
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -496,16 +496,17 @@ impl TextureCache {
         region_index: u16
     ) -> &mut TextureRegion {
         let texture_array = match (format, filter) {
             (ImageFormat::R8, TextureFilter::Linear) => &mut self.array_a8_linear,
             (ImageFormat::BGRA8, TextureFilter::Linear) => &mut self.array_rgba8_linear,
             (ImageFormat::BGRA8, TextureFilter::Nearest) => &mut self.array_rgba8_nearest,
             (ImageFormat::RGBAF32, _) |
             (ImageFormat::RG8, _) |
+            (ImageFormat::RGBAI32, _) |
             (ImageFormat::R8, TextureFilter::Nearest) |
             (ImageFormat::R8, TextureFilter::Trilinear) |
             (ImageFormat::BGRA8, TextureFilter::Trilinear) => unreachable!(),
         };
 
         &mut texture_array.regions[region_index as usize]
     }
 
@@ -720,16 +721,17 @@ impl TextureCache {
         uv_rect_kind: UvRectKind,
     ) -> Option<CacheEntry> {
         // Work out which cache it goes in, based on format.
         let texture_array = match (descriptor.format, filter) {
             (ImageFormat::R8, TextureFilter::Linear) => &mut self.array_a8_linear,
             (ImageFormat::BGRA8, TextureFilter::Linear) => &mut self.array_rgba8_linear,
             (ImageFormat::BGRA8, TextureFilter::Nearest) => &mut self.array_rgba8_nearest,
             (ImageFormat::RGBAF32, _) |
+            (ImageFormat::RGBAI32, _) |
             (ImageFormat::R8, TextureFilter::Nearest) |
             (ImageFormat::R8, TextureFilter::Trilinear) |
             (ImageFormat::BGRA8, TextureFilter::Trilinear) |
             (ImageFormat::RG8, _) => unreachable!(),
         };
 
         // Lazy initialize this texture array if required.
         if texture_array.texture_id.is_none() {
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -7,18 +7,17 @@ use api::{DeviceUintRect, DeviceUintSize
 use api::{MixBlendMode, PipelineId};
 use batch::{AlphaBatchBuilder, AlphaBatchContainer, ClipBatcher, resolve_image};
 use clip::{ClipStore};
 use clip_scroll_tree::{ClipScrollTree, ClipScrollNodeIndex};
 use device::{FrameId, Texture};
 #[cfg(feature = "pathfinder")]
 use euclid::{TypedPoint2D, TypedVector2D};
 use gpu_cache::{GpuCache};
-use gpu_types::{BorderInstance, BlurDirection, BlurInstance};
-use gpu_types::{ClipScrollNodeData, ZBufferIdGenerator};
+use gpu_types::{BorderInstance, BlurDirection, BlurInstance, PrimitiveHeaders, TransformData, TransformPalette};
 use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
 #[cfg(feature = "pathfinder")]
 use pathfinder_partitioner::mesh::Mesh;
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveStore};
 use prim_store::{BrushKind, DeferredResolve};
 use profiler::FrameProfileCounters;
 use render_task::{BlitSource, RenderTaskAddress, RenderTaskId, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, GlyphTask, RenderTaskLocation, RenderTaskTree};
@@ -43,17 +42,17 @@ pub struct ScrollbarPrimitive {
 pub struct RenderTargetIndex(pub usize);
 
 pub struct RenderTargetContext<'a, 'rc> {
     pub device_pixel_scale: DevicePixelScale,
     pub prim_store: &'a PrimitiveStore,
     pub resource_cache: &'rc mut ResourceCache,
     pub clip_scroll_tree: &'a ClipScrollTree,
     pub use_dual_source_blending: bool,
-    pub node_data: &'a [ClipScrollNodeData],
+    pub transforms: &'a TransformPalette,
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct TextureAllocator {
     // TODO(gw): Replace this with a simpler allocator for
     // render target allocation - this use case doesn't need
     // to deal with coalescing etc that the general texture
@@ -98,16 +97,17 @@ pub trait RenderTarget {
     ) -> Self;
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint>;
     fn build(
         &mut self,
         _ctx: &mut RenderTargetContext,
         _gpu_cache: &mut GpuCache,
         _render_tasks: &mut RenderTaskTree,
         _deferred_resolves: &mut Vec<DeferredResolve>,
+        _prim_headers: &mut PrimitiveHeaders,
     ) {
     }
     // TODO(gw): It's a bit odd that we need the deferred resolves and mutable
     //           GPU cache here. They are typically used by the build step
     //           above. They are used for the blit jobs to allow resolve_image
     //           to be called. It's a bit of extra overhead to store the image
     //           key here and the resolve them in the build step separately.
     //           BUT: if/when we add more texture cache target jobs, we might
@@ -161,22 +161,29 @@ impl<T: RenderTarget> RenderTargetList<T
 
     fn build(
         &mut self,
         ctx: &mut RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
         saved_index: Option<SavedTargetIndex>,
+        prim_headers: &mut PrimitiveHeaders,
     ) {
         debug_assert_eq!(None, self.saved_index);
         self.saved_index = saved_index;
 
         for target in &mut self.targets {
-            target.build(ctx, gpu_cache, render_tasks, deferred_resolves);
+            target.build(
+                ctx,
+                gpu_cache,
+                render_tasks,
+                deferred_resolves,
+                prim_headers,
+            );
         }
     }
 
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
@@ -328,19 +335,19 @@ impl RenderTarget for ColorRenderTarget 
     }
 
     fn build(
         &mut self,
         ctx: &mut RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
+        prim_headers: &mut PrimitiveHeaders,
     ) {
         let mut merged_batches = AlphaBatchContainer::new(None);
-        let mut z_generator = ZBufferIdGenerator::new();
 
         for task_id in &self.alpha_tasks {
             let task = &render_tasks[*task_id];
 
             match task.kind {
                 RenderTaskKind::Picture(ref pic_task) => {
                     let brush_index = ctx.prim_store.cpu_metadata[pic_task.prim_index.0].cpu_prim_index;
                     let brush = &ctx.prim_store.cpu_brushes[brush_index.0];
@@ -353,17 +360,17 @@ impl RenderTarget for ColorRenderTarget 
 
                             batch_builder.add_pic_to_batch(
                                 pic,
                                 *task_id,
                                 ctx,
                                 gpu_cache,
                                 render_tasks,
                                 deferred_resolves,
-                                &mut z_generator,
+                                prim_headers,
                             );
 
                             if let Some(batch_container) = batch_builder.build(&mut merged_batches) {
                                 self.alpha_batch_containers.push(batch_container);
                             }
                         }
                         _ => {
                             unreachable!();
@@ -589,16 +596,17 @@ impl RenderTarget for AlphaRenderTarget 
                 let task_address = render_tasks.get_task_address(task_id);
                 self.clip_batcher.add(
                     task_address,
                     &task_info.clips,
                     task_info.coordinate_system_id,
                     ctx.resource_cache,
                     gpu_cache,
                     clip_store,
+                    ctx.transforms,
                 );
             }
             RenderTaskKind::ClipRegion(ref task) => {
                 let task_address = render_tasks.get_task_address(task_id);
                 self.clip_batcher.add_clip_region(
                     task_address,
                     task.clip_data_address,
                 );
@@ -790,33 +798,40 @@ impl RenderPass {
 
     pub fn build(
         &mut self,
         ctx: &mut RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
         clip_store: &ClipStore,
+        prim_headers: &mut PrimitiveHeaders,
     ) {
         profile_scope!("RenderPass::build");
 
         match self.kind {
             RenderPassKind::MainFramebuffer(ref mut target) => {
                 for &task_id in &self.tasks {
                     assert_eq!(render_tasks[task_id].target_kind(), RenderTargetKind::Color);
                     target.add_task(
                         task_id,
                         ctx,
                         gpu_cache,
                         render_tasks,
                         clip_store,
                         deferred_resolves,
                     );
                 }
-                target.build(ctx, gpu_cache, render_tasks, deferred_resolves);
+                target.build(
+                    ctx,
+                    gpu_cache,
+                    render_tasks,
+                    deferred_resolves,
+                    prim_headers,
+                );
             }
             RenderPassKind::OffScreen { ref mut color, ref mut alpha, ref mut texture_cache } => {
                 let is_shared_alpha = self.tasks.iter().any(|&task_id| {
                     let task = &render_tasks[task_id];
                     task.is_shared() &&
                         task.target_kind() == RenderTargetKind::Alpha
                 });
                 let saved_color = if self.tasks.iter().any(|&task_id| {
@@ -906,18 +921,32 @@ impl RenderPass {
                                     clip_store,
                                     deferred_resolves,
                                 ),
                             }
                         }
                     }
                 }
 
-                color.build(ctx, gpu_cache, render_tasks, deferred_resolves, saved_color);
-                alpha.build(ctx, gpu_cache, render_tasks, deferred_resolves, saved_alpha);
+                color.build(
+                    ctx,
+                    gpu_cache,
+                    render_tasks,
+                    deferred_resolves,
+                    saved_color,
+                    prim_headers,
+                );
+                alpha.build(
+                    ctx,
+                    gpu_cache,
+                    render_tasks,
+                    deferred_resolves,
+                    saved_alpha,
+                    prim_headers,
+                );
                 alpha.is_shared = is_shared_alpha;
             }
         }
     }
 }
 
 #[derive(Debug, Clone, Default)]
 pub struct CompositeOps {
@@ -951,19 +980,19 @@ pub struct Frame {
     pub inner_rect: DeviceUintRect,
     pub background_color: Option<ColorF>,
     pub layer: DocumentLayer,
     pub device_pixel_ratio: f32,
     pub passes: Vec<RenderPass>,
     #[cfg_attr(any(feature = "capture", feature = "replay"), serde(default = "FrameProfileCounters::new", skip))]
     pub profile_counters: FrameProfileCounters,
 
-    pub node_data: Vec<ClipScrollNodeData>,
-    pub clip_chain_local_clip_rects: Vec<LayoutRect>,
+    pub transform_palette: Vec<TransformData>,
     pub render_tasks: RenderTaskTree,
+    pub prim_headers: PrimitiveHeaders,
 
     /// The GPU cache frame that the contents of Self depend on
     pub gpu_cache_frame_id: FrameId,
 
     /// List of textures that we don't know about yet
     /// from the backend thread. The render thread
     /// will use a callback to resolve these and
     /// patch the data structures.
--- a/gfx/webrender_api/src/api.rs
+++ b/gfx/webrender_api/src/api.rs
@@ -1098,14 +1098,14 @@ pub struct PropertyValue<T> {
 pub struct DynamicProperties {
     pub transforms: Vec<PropertyValue<LayoutTransform>>,
     pub floats: Vec<PropertyValue<f32>>,
 }
 
 pub trait RenderNotifier: Send {
     fn clone(&self) -> Box<RenderNotifier>;
     fn wake_up(&self);
-    fn new_frame_ready(&self, DocumentId, scrolled: bool, composite_needed: bool);
+    fn new_frame_ready(&self, DocumentId, scrolled: bool, composite_needed: bool, render_time_ns: Option<u64>);
     fn external_event(&self, _evt: ExternalEvent) {
         unimplemented!()
     }
     fn shut_down(&self) {}
 }
--- a/gfx/webrender_api/src/color.rs
+++ b/gfx/webrender_api/src/color.rs
@@ -110,17 +110,17 @@ pub struct ColorU {
     pub r: u8,
     pub g: u8,
     pub b: u8,
     pub a: u8,
 }
 
 impl ColorU {
     /// Constructs a new additive `ColorU` from its components.
-    pub fn new(r: u8, g: u8, b: u8, a: u8) -> ColorU {
+    pub fn new(r: u8, g: u8, b: u8, a: u8) -> Self {
         ColorU { r, g, b, a }
     }
 }
 
 fn round_to_int(x: f32) -> u8 {
     debug_assert!((0.0 <= x) && (x <= 1.0));
     let f = (255.0 * x) + 0.5;
     let val = f.floor();
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -268,32 +268,20 @@ pub struct NormalBorder {
 pub enum RepeatMode {
     Stretch,
     Repeat,
     Round,
     Space,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
-pub struct GradientBorder {
-    pub gradient: Gradient,
-    pub outset: SideOffsets2D<f32>,
-}
-
-#[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
-pub struct RadialGradientBorder {
-    pub gradient: RadialGradient,
-    pub outset: SideOffsets2D<f32>,
-}
-
-#[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
-/// TODO(mrobinson): Currently only images are supported, but we will
-/// eventually add support for Gradient and RadialGradient.
 pub enum NinePatchBorderSource {
     Image(ImageKey),
+    Gradient(Gradient),
+    RadialGradient(RadialGradient),
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct NinePatchBorder {
     /// Describes what to use as the 9-patch source image. If this is an image,
     /// it will be stretched to fill the size given by width x height.
     pub source: NinePatchBorderSource,
 
@@ -328,18 +316,16 @@ pub struct NinePatchBorder {
     /// TODO(mrobinson): This should be removed and handled by the client.
     pub outset: SideOffsets2D<f32>,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub enum BorderDetails {
     Normal(NormalBorder),
     NinePatch(NinePatchBorder),
-    Gradient(GradientBorder),
-    RadialGradient(RadialGradientBorder),
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct BorderDisplayItem {
     pub widths: BorderWidths,
     pub details: BorderDetails,
 }
 
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -22,18 +22,18 @@ use {ImageRendering, LayoutPoint, Layout
 use {LayoutVector2D, LineDisplayItem, LineOrientation, LineStyle, MixBlendMode, PipelineId};
 use {PropertyBinding, PushReferenceFrameDisplayListItem, PushStackingContextDisplayItem};
 use {RadialGradient, RadialGradientDisplayItem, RectangleDisplayItem, ReferenceFrame};
 use {ScrollFrameDisplayItem, ScrollSensitivity, Shadow, SpecificDisplayItem, StackingContext};
 use {StickyFrameDisplayItem, StickyOffsetBounds, TextDisplayItem, TransformStyle, YuvColorSpace};
 use {YuvData, YuvImageDisplayItem};
 
 // We don't want to push a long text-run. If a text-run is too long, split it into several parts.
-// This needs to be set to (renderer::MAX_VERTEX_TEXTURE_WIDTH - VECS_PER_PRIM_HEADER - VECS_PER_TEXT_RUN) * 2
-pub const MAX_TEXT_RUN_LENGTH: usize = 2038;
+// This needs to be set to (renderer::MAX_VERTEX_TEXTURE_WIDTH - VECS_PER_TEXT_RUN) * 2
+pub const MAX_TEXT_RUN_LENGTH: usize = 2040;
 
 // We start at 2, because the root reference is always 0 and the root scroll node is always 1.
 const FIRST_CLIP_ID: usize = 2;
 
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub struct ItemRange<T> {
     start: usize,
--- a/gfx/webrender_api/src/image.rs
+++ b/gfx/webrender_api/src/image.rs
@@ -54,25 +54,27 @@ pub struct ExternalImageData {
 
 #[repr(u32)]
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub enum ImageFormat {
     R8 = 1,
     BGRA8 = 3,
     RGBAF32 = 4,
     RG8 = 5,
+    RGBAI32 = 6,
 }
 
 impl ImageFormat {
     pub fn bytes_per_pixel(self) -> u32 {
         match self {
             ImageFormat::R8 => 1,
             ImageFormat::BGRA8 => 4,
             ImageFormat::RGBAF32 => 16,
             ImageFormat::RG8 => 2,
+            ImageFormat::RGBAI32 => 16,
         }
     }
 }
 
 #[derive(Copy, Clone, Debug, Deserialize, PartialEq, Serialize)]
 pub struct ImageDescriptor {
     pub format: ImageFormat,
     pub size: DeviceUintSize,
--- a/gfx/webrender_bindings/revision.txt
+++ b/gfx/webrender_bindings/revision.txt
@@ -1,1 +1,1 @@
-cf98ad4d63729c678a7575eb9bce36794da5e270
+cdfaaeb5f74e416f39af1081c9a676c752d23896
--- a/gfx/webrender_bindings/webrender_ffi_generated.h
+++ b/gfx/webrender_bindings/webrender_ffi_generated.h
@@ -88,16 +88,17 @@ enum class FontRenderMode : uint32_t {
   Sentinel /* this must be last for serialization purposes. */
 };
 
 enum class ImageFormat : uint32_t {
   R8 = 1,
   BGRA8 = 3,
   RGBAF32 = 4,
   RG8 = 5,
+  RGBAI32 = 6,
 
   Sentinel /* this must be last for serialization purposes. */
 };
 
 enum class ImageRendering : uint32_t {
   Auto = 0,
   CrispEdges = 1,
   Pixelated = 2,
--- a/gfx/wrench/Cargo.toml
+++ b/gfx/wrench/Cargo.toml
@@ -9,17 +9,17 @@ license = "MPL-2.0"
 base64 = "0.6"
 bincode = "1.0"
 byteorder = "1.0"
 env_logger = { version = "0.5", optional = true }
 euclid = "0.17"
 gleam = "0.5"
 glutin = "0.15"
 app_units = "0.6"
-image = "0.18"
+image = "0.19"
 clap = { version = "2", features = ["yaml"] }
 lazy_static = "1"
 log = "0.4"
 yaml-rust = { git = "https://github.com/vvuk/yaml-rust", features = ["preserve_order"] }
 serde_json = "1.0"
 ron = "0.1.5"
 time = "0.1"
 crossbeam = "0.2"
--- a/gfx/wrench/src/args.yaml
+++ b/gfx/wrench/src/args.yaml
@@ -62,16 +62,20 @@ args:
       long: vsync
       help: Enable vsync for OpenGL window
   - no_scissor:
       long: no-scissor
       help: Disable scissors when clearing render targets
   - no_batch:
       long: no-batch
       help: Disable batching of instanced draw calls
+  - chase:
+      long: chase
+      help: Chase a particular primitive matching the local rect
+      takes_value: true
 
 subcommands:
     - png:
         about: render frame described by YAML and save it to a png file
         args:
           - surface:
               short: s
               long: surface
--- a/gfx/wrench/src/main.rs
+++ b/gfx/wrench/src/main.rs
@@ -252,17 +252,17 @@ fn make_window(
         Some(ref events_loop) => {
             let context_builder = glutin::ContextBuilder::new()
                 .with_gl(glutin::GlRequest::GlThenGles {
                     opengl_version: (3, 2),
                     opengles_version: (3, 0),
                 })
                 .with_vsync(vsync);
             let window_builder = winit::WindowBuilder::new()
-                .with_title("WRech")
+                .with_title("WRench")
                 .with_multitouch()
                 .with_dimensions(size.width, size.height);
 
             let init = |context: &glutin::GlContext| {
                 unsafe {
                     context
                         .make_current()
                         .expect("unable to make context current!");
@@ -346,17 +346,21 @@ impl RenderNotifier for Notifier {
     fn wake_up(&self) {
         self.tx.send(NotifierEvent::WakeUp).unwrap();
     }
 
     fn shut_down(&self) {
         self.tx.send(NotifierEvent::ShutDown).unwrap();
     }
 
-    fn new_frame_ready(&self, _: DocumentId, _scrolled: bool, composite_needed: bool) {
+    fn new_frame_ready(&self,
+                       _: DocumentId,
+                       _scrolled: bool,
+                       composite_needed: bool,
+                       _render_time: Option<u64>) {
         if composite_needed {
             self.wake_up();
         }
     }
 }
 
 fn create_notifier() -> (Box<RenderNotifier>, Receiver<NotifierEvent>) {
     let (tx, rx) = channel();
@@ -419,16 +423,30 @@ fn main() {
                 "Size must be specified exactly as 720p, 1080p, 4k, or width x height",
             );
             let w = s[0 .. x].parse::<u32>().expect("Invalid size width");
             let h = s[x + 1 ..].parse::<u32>().expect("Invalid size height");
             DeviceUintSize::new(w, h)
         })
         .unwrap_or(DeviceUintSize::new(1920, 1080));
     let zoom_factor = args.value_of("zoom").map(|z| z.parse::<f32>().unwrap());
+    let chase_primitive = match args.value_of("chase") {
+        Some(s) => {
+            let mut items = s
+                .split(',')
+                .map(|s| s.parse::<f32>().unwrap())
+                .collect::<Vec<_>>();
+            let rect = LayoutRect::new(
+                LayoutPoint::new(items[0], items[1]),
+                LayoutSize::new(items[2], items[3]),
+            );
+            webrender::ChasePrimitive::LocalRect(rect)
+        },
+        None => webrender::ChasePrimitive::Nothing,
+    };
 
     let mut events_loop = if args.is_present("headless") {
         None
     } else {
         Some(winit::EventsLoop::new())
     };
 
     let mut window = make_window(
@@ -457,16 +475,17 @@ fn main() {
         args.is_present("rebuild"),
         args.is_present("no_subpixel_aa"),
         args.is_present("verbose"),
         args.is_present("no_scissor"),
         args.is_present("no_batch"),
         args.is_present("precache"),
         args.is_present("slow_subpixel"),
         zoom_factor.unwrap_or(1.0),
+        chase_primitive,
         notifier,
     );
 
     if let Some(subargs) = args.subcommand_matches("show") {
         render(&mut wrench, &mut window, size, &mut events_loop, subargs);
     } else if let Some(subargs) = args.subcommand_matches("png") {
         let surface = match subargs.value_of("surface") {
             Some("screen") | None => png::ReadSurface::Screen,
@@ -629,27 +648,40 @@ fn render<'a>(
                         let file_name = format!("profile-{}.json", cpu_profile_index);
                         wrench.renderer.save_cpu_profile(&file_name);
                         cpu_profile_index += 1;
                     }
                     VirtualKeyCode::C => {
                         let path = PathBuf::from("../captures/wrench");
                         wrench.api.save_capture(path, CaptureBits::all());
                     }
-                    VirtualKeyCode::Up => {
+                    VirtualKeyCode::Up | VirtualKeyCode::Down => {
+                        let mut txn = Transaction::new();
+
+                        let offset = match vk {
+                            winit::VirtualKeyCode::Up => LayoutVector2D::new(0.0, 10.0),
+                            winit::VirtualKeyCode::Down => LayoutVector2D::new(0.0, -10.0),
+                            _ => unreachable!("Should not see non directional keys here.")
+                        };
+
+                        txn.scroll(ScrollLocation::Delta(offset), cursor_position);
+                        txn.generate_frame();
+                        wrench.api.send_transaction(wrench.document_id, txn);
+
+                        do_frame = true;
+                    }
+                    VirtualKeyCode::Add => {
                         let current_zoom = wrench.get_page_zoom();
                         let new_zoom_factor = ZoomFactor::new(current_zoom.get() + 0.1);
-
                         wrench.set_page_zoom(new_zoom_factor);
                         do_frame = true;
                     }
-                    VirtualKeyCode::Down => {
+                    VirtualKeyCode::Subtract => {
                         let current_zoom = wrench.get_page_zoom();
                         let new_zoom_factor = ZoomFactor::new((current_zoom.get() - 0.1).max(0.1));
-
                         wrench.set_page_zoom(new_zoom_factor);
                         do_frame = true;
                     }
                     VirtualKeyCode::X => {
                         let results = wrench.api.hit_test(
                             wrench.document_id,
                             None,
                             cursor_position,
--- a/gfx/wrench/src/wrench.rs
+++ b/gfx/wrench/src/wrench.rs
@@ -104,17 +104,20 @@ impl RenderNotifier for Notifier {
     fn clone(&self) -> Box<RenderNotifier> {
         Box::new(Notifier(self.0.clone()))
     }
 
     fn wake_up(&self) {
         self.update(false);
     }
 
-    fn new_frame_ready(&self, _: DocumentId, scrolled: bool, _composite_needed: bool) {
+    fn new_frame_ready(&self, _: DocumentId,
+                       scrolled: bool,
+                       _composite_needed: bool,
+                       _render_time: Option<u64>) {
         self.update(!scrolled);
     }
 }
 
 pub trait WrenchThing {
     fn next_frame(&mut self);
     fn prev_frame(&mut self);
     fn do_frame(&mut self, &mut Wrench) -> u32;
@@ -175,16 +178,17 @@ impl Wrench {
         do_rebuild: bool,
         no_subpixel_aa: bool,
         verbose: bool,
         no_scissor: bool,
         no_batch: bool,
         precache_shaders: bool,
         disable_dual_source_blending: bool,
         zoom_factor: f32,
+        chase_primitive: webrender::ChasePrimitive,
         notifier: Option<Box<RenderNotifier>>,
     ) -> Self {
         println!("Shader override path: {:?}", shader_override_path);
 
         let recorder = save_type.map(|save_type| match save_type {
             SaveType::Yaml => Box::new(
                 YamlFrameWriterReceiver::new(&PathBuf::from("yaml_frames")),
             ) as Box<webrender::ApiRecordingReceiver>,
@@ -207,16 +211,17 @@ impl Wrench {
             recorder,
             enable_subpixel_aa: !no_subpixel_aa,
             debug_flags,
             enable_clear_scissor: !no_scissor,
             max_recorded_profiles: 16,
             precache_shaders,
             blob_image_renderer: Some(Box::new(blob::CheckerboardRenderer::new(callbacks.clone()))),
             disable_dual_source_blending,
+            chase_primitive,
             ..Default::default()
         };
 
         // put an Awakened event into the queue to kick off the first frame
         if let Some(ref elp) = proxy {
             #[cfg(not(target_os = "android"))]
             let _ = elp.wakeup();
         }
--- a/gfx/wrench/src/yaml_frame_reader.rs
+++ b/gfx/wrench/src/yaml_frame_reader.rs
@@ -168,17 +168,18 @@ fn is_image_opaque(format: ImageFormat, 
                     is_opaque = false;
                     break;
                 }
             }
             is_opaque
         }
         ImageFormat::RG8 => true,
         ImageFormat::R8 => false,
-        ImageFormat::RGBAF32 => unreachable!(),
+        ImageFormat::RGBAF32 |
+        ImageFormat::RGBAI32 => unreachable!(),
     }
 }
 
 pub struct YamlFrameReader {
     frame_built: bool,
     yaml_path: PathBuf,
     aux_dir: PathBuf,
     frame_count: u32,
@@ -875,86 +876,85 @@ impl YamlFrameReader {
                     Some(BorderDetails::Normal(NormalBorder {
                         top,
                         left,
                         bottom,
                         right,
                         radius,
                     }))
                 }
-                "image" => {
-                    let file = rsrc_path(&item["image-source"], &self.aux_dir);
-                    let (image_key, _) = self
-                        .add_or_get_image(&file, None, wrench);
+                "image" | "gradient" | "radial-gradient" => {
                     let image_width = item["image-width"]
                         .as_i64()
-                        .expect("border must have image-width");
+                        .unwrap_or(info.rect.size.width as i64);
                     let image_height = item["image-height"]
                         .as_i64()
-                        .expect("border must have image-height");
+                        .unwrap_or(info.rect.size.height as i64);
                     let fill = item["fill"].as_bool().unwrap_or(false);
-                    let slice = item["slice"].as_vec_u32().expect("border must have slice");
-                    let slice = broadcast(&slice, 4);
+
+                    let slice = item["slice"].as_vec_u32();
+                    let slice = match slice {
+                        Some(slice) => broadcast(&slice, 4),
+                        None => vec![widths.top as u32, widths.left as u32, widths.bottom as u32, widths.right as u32],
+                    };
+
                     let outset = item["outset"]
                         .as_vec_f32()
                         .expect("border must have outset");
                     let outset = broadcast(&outset, 4);
                     let repeat_horizontal = match item["repeat-horizontal"]
                         .as_str()
-                        .expect("border must have repeat-horizontal")
+                        .unwrap_or("stretch")
                     {
                         "stretch" => RepeatMode::Stretch,
                         "repeat" => RepeatMode::Repeat,
                         "round" => RepeatMode::Round,
                         "space" => RepeatMode::Space,
                         s => panic!("Unknown box border image repeat mode {}", s),
                     };
                     let repeat_vertical = match item["repeat-vertical"]
                         .as_str()
-                        .expect("border must have repeat-vertical")
+                        .unwrap_or("stretch")
                     {
                         "stretch" => RepeatMode::Stretch,
                         "repeat" => RepeatMode::Repeat,
                         "round" => RepeatMode::Round,
                         "space" => RepeatMode::Space,
                         s => panic!("Unknown box border image repeat mode {}", s),
                     };
+                    let source = match border_type {
+                        "image" => {
+                            let file = rsrc_path(&item["image-source"], &self.aux_dir);
+                            let (image_key, _) = self
+                                .add_or_get_image(&file, None, wrench);
+                            NinePatchBorderSource::Image(image_key)
+                        }
+                        "gradient" => {
+                            let gradient = self.to_gradient(dl, item);
+                            NinePatchBorderSource::Gradient(gradient)
+                        }
+                        "radial-gradient" => {
+                            let gradient = self.to_radial_gradient(dl, item);
+                            NinePatchBorderSource::RadialGradient(gradient)
+
+                        }
+                        _ => unreachable!("Unexpected border type"),
+                    };
+
                     Some(BorderDetails::NinePatch(NinePatchBorder {
-                        source: NinePatchBorderSource::Image(image_key),
+                        source,
                         width: image_width as u32,
                         height: image_height as u32,
                         slice: SideOffsets2D::new(slice[0], slice[1], slice[2], slice[3]),
                         fill,
                         repeat_horizontal,
                         repeat_vertical,
                         outset: SideOffsets2D::new(outset[0], outset[1], outset[2], outset[3]),
                     }))
                 }
-                "gradient" => {
-                    let gradient = self.to_gradient(dl, item);
-                    let outset = item["outset"]
-                        .as_vec_f32()
-                        .expect("borders must have outset");
-                    let outset = broadcast(&outset, 4);
-                    Some(BorderDetails::Gradient(GradientBorder {
-                        gradient,
-                        outset: SideOffsets2D::new(outset[0], outset[1], outset[2], outset[3]),
-                    }))
-                }
-                "radial-gradient" => {
-                    let gradient = self.to_radial_gradient(dl, item);
-                    let outset = item["outset"]
-                        .as_vec_f32()
-                        .expect("borders must have outset");
-                    let outset = broadcast(&outset, 4);
-                    Some(BorderDetails::RadialGradient(RadialGradientBorder {
-                        gradient,
-                        outset: SideOffsets2D::new(outset[0], outset[1], outset[2], outset[3]),
-                    }))
-                }
                 _ => {
                     println!("Unable to parse border {:?}", item);
                     None
                 }
             }
         } else {
             println!("Unable to parse border {:?}", item);
             None
@@ -1386,17 +1386,17 @@ impl YamlFrameReader {
         });
 
         let real_id = dl.define_scroll_frame(
             external_id,
             content_rect,
             clip_rect,
             complex_clips,
             image_mask,
-            ScrollSensitivity::Script,
+            ScrollSensitivity::ScriptAndInputEvents,
         );
         if let Some(numeric_id) = numeric_id {
             self.add_clip_id_mapping(numeric_id, real_id);
         }
 
         if !yaml["items"].is_badvalue() {
             dl.push_clip_id(real_id);
             self.add_display_list_items_from_yaml(dl, wrench, &yaml["items"]);
--- a/gfx/wrench/src/yaml_frame_writer.rs
+++ b/gfx/wrench/src/yaml_frame_writer.rs
@@ -880,24 +880,45 @@ impl YamlFrameWriter {
                             ];
                             let outset: Vec<f32> = vec![
                                 details.outset.top,
                                 details.outset.right,
                                 details.outset.bottom,
                                 details.outset.left,
                             ];
                             yaml_node(&mut v, "width", f32_vec_yaml(&widths, true));
-                            str_node(&mut v, "border-type", "image");
 
                             match details.source {
                                 NinePatchBorderSource::Image(image_key) => {
+                                    str_node(&mut v, "border-type", "image");
                                     if let Some(path) = self.path_for_image(image_key) {
                                         path_node(&mut v, "image", &path);
                                     }
                                 }
+                                NinePatchBorderSource::Gradient(gradient) => {
+                                    str_node(&mut v, "gradient", "image");
+                                    point_node(&mut v, "start", &gradient.start_point);
+                                    point_node(&mut v, "end", &gradient.end_point);
+                                    let mut stops = vec![];
+                                    for stop in display_list.get(base.gradient_stops()) {
+                                        stops.push(Yaml::Real(stop.offset.to_string()));
+                                        stops.push(Yaml::String(color_to_string(stop.color)));
+                                    }
+                                    yaml_node(&mut v, "stops", Yaml::Array(stops));
+                                    bool_node(&mut v, "repeat", gradient.extend_mode == ExtendMode::Repeat);
+                                }
+                                NinePatchBorderSource::RadialGradient(gradient) => {
+                                    str_node(&mut v, "border-type", "radial-gradient");
+                                    radial_gradient_to_yaml(
+                                        &mut v,
+                                        &gradient,
+                                        base.gradient_stops(),
+                                        display_list
+                                    );
+                                }
                             }
 
                             u32_node(&mut v, "image-width", details.width);
                             u32_node(&mut v, "image-height", details.height);
                             let slice: Vec<u32> = vec![
                                 details.slice.top,
                                 details.slice.right,
                                 details.slice.bottom,
@@ -919,69 +940,16 @@ impl YamlFrameWriter {
                                 RepeatMode::Stretch => {
                                     str_node(&mut v, "repeat-vertical", "stretch")
                                 }
                                 RepeatMode::Repeat => str_node(&mut v, "repeat-vertical", "repeat"),
                                 RepeatMode::Round => str_node(&mut v, "repeat-vertical", "round"),
                                 RepeatMode::Space => str_node(&mut v, "repeat-vertical", "space"),
                             };
                         }
-                        BorderDetails::Gradient(ref details) => {
-                            let widths: Vec<f32> = vec![
-                                item.widths.top,
-                                item.widths.right,
-                                item.widths.bottom,
-                                item.widths.left,
-                            ];
-                            let outset: Vec<f32> = vec![
-                                details.outset.top,
-                                details.outset.right,
-                                details.outset.bottom,
-                                details.outset.left,
-                            ];
-                            yaml_node(&mut v, "width", f32_vec_yaml(&widths, true));
-                            str_node(&mut v, "border-type", "gradient");
-                            point_node(&mut v, "start", &details.gradient.start_point);
-                            point_node(&mut v, "end", &details.gradient.end_point);
-                            let mut stops = vec![];
-                            for stop in display_list.get(base.gradient_stops()) {
-                                stops.push(Yaml::Real(stop.offset.to_string()));
-                                stops.push(Yaml::String(color_to_string(stop.color)));
-                            }
-                            yaml_node(&mut v, "stops", Yaml::Array(stops));
-                            bool_node(
-                                &mut v,
-                                "repeat",
-                                details.gradient.extend_mode == ExtendMode::Repeat,
-                            );
-                            yaml_node(&mut v, "outset", f32_vec_yaml(&outset, true));
-                        }
-                        BorderDetails::RadialGradient(ref details) => {
-                            let widths: Vec<f32> = vec![
-                                item.widths.top,
-                                item.widths.right,
-                                item.widths.bottom,
-                                item.widths.left,
-                            ];
-                            let outset: Vec<f32> = vec![
-                                details.outset.top,
-                                details.outset.right,
-                                details.outset.bottom,
-                                details.outset.left,
-                            ];
-                            yaml_node(&mut v, "width", f32_vec_yaml(&widths, true));
-                            str_node(&mut v, "border-type", "radial-gradient");
-                            yaml_node(&mut v, "outset", f32_vec_yaml(&outset, true));
-                            radial_gradient_to_yaml(
-                                &mut v,
-                                &details.gradient,
-                                base.gradient_stops(),
-                                display_list
-                            );
-                        }
                     }
                 }
                 BoxShadow(item) => {
                     str_node(&mut v, "type", "box-shadow");
                     rect_node(&mut v, "box-bounds", &item.box_bounds);
                     vector_node(&mut v, "offset", &item.offset);
                     color_node(&mut v, "color", item.color);
                     f32_node(&mut v, "blur-radius", item.blur_radius);