Bug 1414271 - Update webrender to commit 34f1e8ed19a19cb950deef89ee31c1cf3d442d22. r?jrmuizel draft
authorKartikaya Gupta <kgupta@mozilla.com>
Tue, 07 Nov 2017 08:31:57 -0500
changeset 694165 fd8935d6615ba62972894a797f6354018f740c47
parent 694164 7851d6768dfd9fe5568d1315a98f142d9bb9234f
child 694166 d318e01aed71bc0deb2ee68e3f154e24051218d0
push id88062
push userkgupta@mozilla.com
push dateTue, 07 Nov 2017 13:38:30 +0000
reviewersjrmuizel
bugs1414271
milestone58.0a1
Bug 1414271 - Update webrender to commit 34f1e8ed19a19cb950deef89ee31c1cf3d442d22. r?jrmuizel Also includes regenerated FFI header. MozReview-Commit-ID: BwH36DHIvrc
gfx/doc/README.webrender
gfx/webrender/examples/scrolling.rs
gfx/webrender/res/brush.glsl
gfx/webrender/res/brush_image.glsl
gfx/webrender/res/clip_shared.glsl
gfx/webrender/res/cs_clip_border.glsl
gfx/webrender/res/cs_clip_image.glsl
gfx/webrender/res/cs_clip_rectangle.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_hardware_composite.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/src/box_shadow.rs
gfx/webrender/src/clip.rs
gfx/webrender/src/clip_scroll_node.rs
gfx/webrender/src/clip_scroll_tree.rs
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/gpu_types.rs
gfx/webrender/src/platform/windows/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/profiler.rs
gfx/webrender/src/record.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender/src/util.rs
gfx/webrender_api/src/display_item.rs
gfx/webrender_api/src/display_list.rs
gfx/webrender_bindings/webrender_ffi_generated.h
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 
 -------------------------------------------------------------------------------
 
 The version of WebRender currently in the tree is:
-fae962bfd6e1997f4b921ee93c3c1cc5abca3256
+34f1e8ed19a19cb950deef89ee31c1cf3d442d22
--- a/gfx/webrender/examples/scrolling.rs
+++ b/gfx/webrender/examples/scrolling.rs
@@ -106,17 +106,18 @@ impl Example for App {
             // at a margin of 10px from the bottom, for 40 pixels of scrolling,
             // and once at a margin of 10px from the top, for 60 pixels of
             // scrolling.
             let sticky_id = builder.define_sticky_frame(
                 None,
                 (50, 350).by(50, 50),
                 SideOffsets2D::new(Some(10.0), None, Some(10.0), None),
                 StickyOffsetBounds::new(-40.0, 60.0),
-                StickyOffsetBounds::new(0.0, 0.0)
+                StickyOffsetBounds::new(0.0, 0.0),
+                LayoutVector2D::new(0.0, 0.0)
             );
 
             builder.push_clip_id(sticky_id);
             let info = LayoutPrimitiveInfo::new((50, 350).by(50, 50));
             builder.push_rect(&info, ColorF::new(0.5, 0.5, 1.0, 1.0));
             builder.pop_clip_id(); // sticky_id
 
             // just for good measure add another teal square further down and to
--- a/gfx/webrender/res/brush.glsl
+++ b/gfx/webrender/res/brush.glsl
@@ -14,29 +14,31 @@ void brush_vs(
 // Whether this brush is being drawn on a Picture
 // task (new) or an alpha batch task (legacy).
 // Can be removed once everything uses pictures.
 #define BRUSH_FLAG_USES_PICTURE     (1 << 0)
 
 struct BrushInstance {
     int picture_address;
     int prim_address;
-    int layer_address;
+    int clip_node_id;
+    int scroll_node_id;
     int clip_address;
     int z;
     int flags;
     ivec2 user_data;
 };
 
 BrushInstance load_brush() {
 	BrushInstance bi;
 
     bi.picture_address = aData0.x;
     bi.prim_address = aData0.y;
-    bi.layer_address = aData0.z;
+    bi.clip_node_id = aData0.z / 65536;
+    bi.scroll_node_id = aData0.z % 65536;
     bi.clip_address = aData0.w;
     bi.z = aData1.x;
     bi.flags = aData1.y;
     bi.user_data = aData1.zw;
 
     return bi;
 }
 
@@ -61,17 +63,17 @@ void main(void) {
         // Right now - pictures only support local positions. In the future, this
         // will be expanded to support transform picture types (the common kind).
         device_pos = pic_task.target_rect.p0 + uDevicePixelRatio * (local_pos - pic_task.content_origin);
 
         // Write the final position transformed by the orthographic device-pixel projection.
         gl_Position = uTransform * vec4(device_pos, 0.0, 1.0);
     } else {
         AlphaBatchTask alpha_task = fetch_alpha_batch_task(brush.picture_address);
-        Layer layer = fetch_layer(brush.layer_address);
+        Layer layer = fetch_layer(brush.clip_node_id, brush.scroll_node_id);
         ClipArea clip_area = fetch_clip_area(brush.clip_address);
 
         // Write the normal vertex information out.
         // TODO(gw): Support transform types in brushes. For now,
         //           the old cache image shader didn't support
         //           them yet anyway, so we're not losing any
         //           existing functionality.
         VertexInfo vi = write_vertex(
--- a/gfx/webrender/res/brush_image.glsl
+++ b/gfx/webrender/res/brush_image.glsl
@@ -2,16 +2,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include shared,prim_shared,brush
 
 varying vec3 vUv;
 flat varying int vImageKind;
 flat varying vec4 vUvBounds;
+flat varying vec4 vUvBounds_NoClamp;
 flat varying vec4 vParams;
 
 #if defined WR_FEATURE_ALPHA_TARGET
 flat varying vec4 vColor;
 #endif
 
 #define BRUSH_IMAGE_SIMPLE      0
 #define BRUSH_IMAGE_NINEPATCH   1
@@ -37,18 +38,18 @@ void brush_vs(
 #if defined WR_FEATURE_COLOR_TARGET
     vec2 texture_size = vec2(textureSize(sColor0, 0).xy);
 #else
     vec2 texture_size = vec2(textureSize(sColor1, 0).xy);
     vColor = task.color;
 #endif
 
     vec2 uv0 = task.target_rect.p0;
-    vec2 src_size = task.target_rect.size;
-    vec2 uv1 = uv0 + src_size;
+    vec2 src_size = task.target_rect.size * task.scale_factor;
+    vec2 uv1 = uv0 + task.target_rect.size;
 
     // TODO(gw): In the future we'll probably draw these as segments
     //           with the brush shader. When that occurs, we can
     //           modify the UVs for each segment in the VS, and the
     //           FS can become a simple shader that doesn't need
     //           to adjust the UVs.
 
     switch (vImageKind) {
@@ -69,44 +70,47 @@ void brush_vs(
             vec2 local_src_size = src_size / uDevicePixelRatio;
             vUv.xy = (local_pos - local_rect.p0) / local_src_size;
             vParams.xy = 0.5 * local_rect.size / local_src_size;
             break;
         }
     }
 
     vUvBounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) / texture_size.xyxy;
+    vUvBounds_NoClamp = vec4(uv0, uv1) / texture_size.xyxy;
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 vec4 brush_fs() {
     vec2 uv;
 
     switch (vImageKind) {
         case BRUSH_IMAGE_SIMPLE: {
             uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
             break;
         }
         case BRUSH_IMAGE_NINEPATCH: {
             uv = clamp(vUv.xy, vec2(0.0), vParams.xy);
             uv += max(vec2(0.0), vUv.xy - vParams.zw);
-            uv = mix(vUvBounds.xy, vUvBounds.zw, uv);
+            uv = mix(vUvBounds_NoClamp.xy, vUvBounds_NoClamp.zw, uv);
+            uv = clamp(uv, vUvBounds.xy, vUvBounds.zw);
             break;
         }
         case BRUSH_IMAGE_MIRROR: {
             // Mirror and stretch the box shadow corner over the entire
             // primitives.
             uv = vParams.xy - abs(vUv.xy - vParams.xy);
 
             // Ensure that we don't fetch texels outside the box
             // shadow corner. This can happen, for example, when
             // drawing the outer parts of an inset box shadow.
             uv = clamp(uv, vec2(0.0), vec2(1.0));
-            uv = mix(vUvBounds.xy, vUvBounds.zw, uv);
+            uv = mix(vUvBounds_NoClamp.xy, vUvBounds_NoClamp.zw, uv);
+            uv = clamp(uv, vUvBounds.xy, vUvBounds.zw);
             break;
         }
     }
 
 #if defined WR_FEATURE_COLOR_TARGET
     vec4 color = texture(sColor0, vec3(uv, vUv.z));
 #else
     vec4 color = vColor * texture(sColor1, vec3(uv, vUv.z)).r;
--- a/gfx/webrender/res/clip_shared.glsl
+++ b/gfx/webrender/res/clip_shared.glsl
@@ -47,20 +47,16 @@ RectWithSize intersect_rect(RectWithSize
 }
 
 // The transformed vertex function that always covers the whole clip area,
 // which is the intersection of all clip instances of a given primitive
 ClipVertexInfo write_clip_tile_vertex(RectWithSize local_clip_rect,
                                       Layer layer,
                                       ClipArea area,
                                       int segment) {
-
-    RectWithSize clipped_local_rect = intersect_rect(local_clip_rect,
-                                                     layer.local_clip_rect);
-
     vec2 outer_p0 = area.screen_origin_target_index.xy;
     vec2 outer_p1 = outer_p0 + area.task_bounds.zw - area.task_bounds.xy;
     vec2 inner_p0 = area.inner_rect.xy;
     vec2 inner_p1 = area.inner_rect.zw;
 
     vec2 p0, p1;
     switch (segment) {
         case SEGMENT_ALL:
@@ -89,15 +85,15 @@ ClipVertexInfo write_clip_tile_vertex(Re
 
     vec4 layer_pos = get_layer_pos(actual_pos / uDevicePixelRatio, layer);
 
     // compute the point position in side the layer, in CSS space
     vec2 vertex_pos = actual_pos + area.task_bounds.xy - area.screen_origin_target_index.xy;
 
     gl_Position = uTransform * vec4(vertex_pos, 0.0, 1);
 
-    vLocalBounds = vec4(clipped_local_rect.p0, clipped_local_rect.p0 + clipped_local_rect.size);
+    vLocalBounds = vec4(local_clip_rect.p0, local_clip_rect.p0 + local_clip_rect.size);
 
-    ClipVertexInfo vi = ClipVertexInfo(layer_pos.xyw, actual_pos, clipped_local_rect);
+    ClipVertexInfo vi = ClipVertexInfo(layer_pos.xyw, actual_pos, local_clip_rect);
     return vi;
 }
 
 #endif //WR_VERTEX_SHADER
--- a/gfx/webrender/res/cs_clip_border.glsl
+++ b/gfx/webrender/res/cs_clip_border.glsl
@@ -59,17 +59,17 @@ struct BorderClipDot {
 BorderClipDot fetch_border_clip_dot(ivec2 address, int segment) {
     vec4 data = fetch_from_resource_cache_1_direct(address + ivec2(2 + (segment - 1), 0));
     return BorderClipDot(data.xyz);
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
-    Layer layer = fetch_layer(cmi.layer_address);
+    Layer layer = fetch_layer(cmi.layer_address, cmi.layer_address);
 
     // Fetch the header information for this corner clip.
     BorderCorner corner = fetch_border_corner(cmi.clip_data_address);
     vClipCenter = corner.clip_center;
 
     if (cmi.segment == 0) {
         // The first segment is used to zero out the border corner.
         vAlphaMask = vec2(0.0);
--- a/gfx/webrender/res/cs_clip_image.glsl
+++ b/gfx/webrender/res/cs_clip_image.glsl
@@ -19,17 +19,17 @@ ImageMaskData fetch_mask_data(ivec2 addr
     RectWithSize local_rect = RectWithSize(data.xy, data.zw);
     ImageMaskData mask_data = ImageMaskData(local_rect);
     return mask_data;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
-    Layer layer = fetch_layer(cmi.layer_address);
+    Layer layer = fetch_layer(cmi.layer_address, cmi.layer_address);
     ImageMaskData mask = fetch_mask_data(cmi.clip_data_address);
     RectWithSize local_rect = mask.local_rect;
     ImageResource res = fetch_image_resource_direct(cmi.resource_address);
 
     ClipVertexInfo vi = write_clip_tile_vertex(local_rect,
                                                layer,
                                                area,
                                                cmi.segment);
--- a/gfx/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.glsl
@@ -53,17 +53,17 @@ ClipData fetch_clip(ivec2 address) {
     clip.bottom_right = fetch_clip_corner(address, 3.0);
 
     return clip;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
-    Layer layer = fetch_layer(cmi.layer_address);
+    Layer layer = fetch_layer(cmi.layer_address, cmi.layer_address);
     ClipData clip = fetch_clip(cmi.clip_data_address);
     RectWithSize local_rect = clip.rect.rect;
 
     ClipVertexInfo vi = write_clip_tile_vertex(local_rect,
                                                layer,
                                                area,
                                                cmi.segment);
     vPos = vi.local_pos;
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -66,24 +66,24 @@ vec4[2] fetch_from_resource_cache_2(int 
     return vec4[2](
         TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
         TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0))
     );
 }
 
 #ifdef WR_VERTEX_SHADER
 
-#define VECS_PER_LAYER              9
+#define VECS_PER_LAYER              10
 #define VECS_PER_RENDER_TASK        3
 #define VECS_PER_PRIM_HEADER        2
 #define VECS_PER_TEXT_RUN           3
 #define VECS_PER_GRADIENT           3
 #define VECS_PER_GRADIENT_STOP      2
 
-uniform HIGHP_SAMPLER_FLOAT sampler2D sLayers;
+uniform HIGHP_SAMPLER_FLOAT sampler2D sClipScrollNodes;
 uniform HIGHP_SAMPLER_FLOAT sampler2D sRenderTasks;
 
 // Instanced attributes
 in ivec4 aData0;
 in ivec4 aData1;
 
 // get_fetch_uv is a macro to work around a macOS Intel driver parsing bug.
 // TODO: convert back to a function once the driver issues are resolved, if ever.
@@ -138,45 +138,75 @@ vec4 fetch_from_resource_cache_1_direct(
     return texelFetch(sResourceCache, address, 0);
 }
 
 vec4 fetch_from_resource_cache_1(int address) {
     ivec2 uv = get_resource_cache_uv(address);
     return texelFetch(sResourceCache, uv, 0);
 }
 
-struct Layer {
+struct ClipScrollNode {
     mat4 transform;
     mat4 inv_transform;
-    RectWithSize local_clip_rect;
+    vec4 local_clip_rect;
+    vec2 reference_frame_relative_scroll_offset;
+    vec2 scroll_offset;
 };
 
-Layer fetch_layer(int index) {
-    Layer layer;
+ClipScrollNode fetch_clip_scroll_node(int index) {
+    ClipScrollNode node;
 
     // Create a UV base coord for each 8 texels.
     // This is required because trying to use an offset
     // of more than 8 texels doesn't work on some versions
     // of OSX.
     ivec2 uv = get_fetch_uv(index, VECS_PER_LAYER);
     ivec2 uv0 = ivec2(uv.x + 0, uv.y);
     ivec2 uv1 = ivec2(uv.x + 8, uv.y);
 
-    layer.transform[0] = TEXEL_FETCH(sLayers, uv0, 0, ivec2(0, 0));
-    layer.transform[1] = TEXEL_FETCH(sLayers, uv0, 0, ivec2(1, 0));
-    layer.transform[2] = TEXEL_FETCH(sLayers, uv0, 0, ivec2(2, 0));
-    layer.transform[3] = TEXEL_FETCH(sLayers, uv0, 0, ivec2(3, 0));
+    node.transform[0] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(0, 0));
+    node.transform[1] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(1, 0));
+    node.transform[2] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(2, 0));
+    node.transform[3] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(3, 0));
+
+    node.inv_transform[0] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(4, 0));
+    node.inv_transform[1] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(5, 0));
+    node.inv_transform[2] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(6, 0));
+    node.inv_transform[3] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(7, 0));
+
+    vec4 clip_rect = TEXEL_FETCH(sClipScrollNodes, uv1, 0, ivec2(0, 0));
+    node.local_clip_rect = clip_rect;
+
+    vec4 offsets = TEXEL_FETCH(sClipScrollNodes, uv1, 0, ivec2(1, 0));
+    node.reference_frame_relative_scroll_offset = offsets.xy;
+    node.scroll_offset = offsets.zw;
+
+    return node;
+}
 
-    layer.inv_transform[0] = TEXEL_FETCH(sLayers, uv0, 0, ivec2(4, 0));
-    layer.inv_transform[1] = TEXEL_FETCH(sLayers, uv0, 0, ivec2(5, 0));
-    layer.inv_transform[2] = TEXEL_FETCH(sLayers, uv0, 0, ivec2(6, 0));
-    layer.inv_transform[3] = TEXEL_FETCH(sLayers, uv0, 0, ivec2(7, 0));
+struct Layer {
+    mat4 transform;
+    mat4 inv_transform;
+    RectWithSize local_clip_rect;
+};
+
+Layer fetch_layer(int clip_node_id, int scroll_node_id) {
+    ClipScrollNode clip_node = fetch_clip_scroll_node(clip_node_id);
+    ClipScrollNode scroll_node = fetch_clip_scroll_node(scroll_node_id);
 
-    vec4 clip_rect = TEXEL_FETCH(sLayers, uv1, 0, ivec2(0, 0));
-    layer.local_clip_rect = RectWithSize(clip_rect.xy, clip_rect.zw);
+    Layer layer;
+    layer.transform = scroll_node.transform;
+    layer.inv_transform = scroll_node.inv_transform;
+
+    vec4 local_clip_rect = clip_node.local_clip_rect;
+    local_clip_rect.xy += clip_node.reference_frame_relative_scroll_offset;
+    local_clip_rect.xy -= scroll_node.reference_frame_relative_scroll_offset;
+    local_clip_rect.xy -= scroll_node.scroll_offset;
+
+    layer.local_clip_rect = RectWithSize(local_clip_rect.xy, local_clip_rect.zw);
 
     return layer;
 }
 
 struct RenderTaskData {
     vec4 data0;
     vec4 data1;
     vec4 data2;
@@ -223,26 +253,28 @@ PictureTask fetch_picture_task(int addre
 
     return task;
 }
 
 struct BlurTask {
     RectWithSize target_rect;
     float render_target_layer_index;
     float blur_radius;
+    float scale_factor;
     vec4 color;
 };
 
 BlurTask fetch_blur_task(int address) {
     RenderTaskData task_data = fetch_render_task(address);
 
     return BlurTask(
         RectWithSize(task_data.data0.xy, task_data.data0.zw),
         task_data.data1.x,
         task_data.data1.y,
+        task_data.data1.z,
         task_data.data2
     );
 }
 
 struct AlphaBatchTask {
     vec2 screen_space_origin;
     vec2 render_target_origin;
     vec2 size;
@@ -353,58 +385,64 @@ Glyph fetch_glyph(int specific_prim_addr
     return Glyph(glyph);
 }
 
 struct PrimitiveInstance {
     int prim_address;
     int specific_prim_address;
     int render_task_index;
     int clip_task_index;
-    int layer_index;
+    int scroll_node_id;
+    int clip_node_id;
     int z;
     int user_data0;
     int user_data1;
     int user_data2;
 };
 
 PrimitiveInstance fetch_prim_instance() {
     PrimitiveInstance pi;
 
     pi.prim_address = aData0.x;
     pi.specific_prim_address = pi.prim_address + VECS_PER_PRIM_HEADER;
     pi.render_task_index = aData0.y;
     pi.clip_task_index = aData0.z;
-    pi.layer_index = aData0.w;
+    pi.clip_node_id = aData0.w / 65536;
+    pi.scroll_node_id = aData0.w % 65536;
     pi.z = aData1.x;
     pi.user_data0 = aData1.y;
     pi.user_data1 = aData1.z;
     pi.user_data2 = aData1.w;
 
     return pi;
 }
 
 struct CompositeInstance {
     int render_task_index;
     int src_task_index;
     int backdrop_task_index;
     int user_data0;
     int user_data1;
     float z;
+    int user_data2;
+    int user_data3;
 };
 
 CompositeInstance fetch_composite_instance() {
     CompositeInstance ci;
 
     ci.render_task_index = aData0.x;
     ci.src_task_index = aData0.y;
     ci.backdrop_task_index = aData0.z;
     ci.z = float(aData0.w);
 
     ci.user_data0 = aData1.x;
     ci.user_data1 = aData1.y;
+    ci.user_data2 = aData1.z;
+    ci.user_data3 = aData1.w;
 
     return ci;
 }
 
 struct Primitive {
     Layer layer;
     ClipArea clip_area;
 #ifdef PRIMITIVE_HAS_PICTURE_TASK
@@ -432,17 +470,17 @@ PrimitiveGeometry fetch_primitive_geomet
                              RectWithSize(geom[1].xy, geom[1].zw));
 }
 
 Primitive load_primitive() {
     PrimitiveInstance pi = fetch_prim_instance();
 
     Primitive prim;
 
-    prim.layer = fetch_layer(pi.layer_index);
+    prim.layer = fetch_layer(pi.clip_node_id, pi.scroll_node_id);
     prim.clip_area = fetch_clip_area(pi.clip_task_index);
 #ifdef PRIMITIVE_HAS_PICTURE_TASK
     prim.task = fetch_picture_task(pi.render_task_index);
 #else
     prim.task = fetch_alpha_batch_task(pi.render_task_index);
 #endif
 
     PrimitiveGeometry geom = fetch_primitive_geometry(pi.prim_address);
@@ -541,18 +579,17 @@ VertexInfo write_vertex(RectWithSize ins
                         Layer layer,
                         AlphaBatchTask task,
                         RectWithSize snap_rect) {
 
     // Select the corner of the local rect that we are processing.
     vec2 local_pos = instance_rect.p0 + instance_rect.size * aPosition.xy;
 
     // Clamp to the two local clip rects.
-    vec2 clamped_local_pos = clamp_rect(clamp_rect(local_pos, local_clip_rect),
-                                        layer.local_clip_rect);
+    vec2 clamped_local_pos = clamp_rect(clamp_rect(local_pos, local_clip_rect), layer.local_clip_rect);
 
     /// Compute the snapping offset.
     vec2 snap_offset = compute_snap_offset(clamped_local_pos, local_clip_rect, layer, snap_rect);
 
     // Transform the current vertex to the world cpace.
     vec4 world_pos = layer.transform * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
@@ -734,20 +771,19 @@ struct Image {
 };
 
 Image fetch_image(int address) {
     vec4 data[2] = fetch_from_resource_cache_2(address);
     return Image(data[0], data[1]);
 }
 
 void write_clip(vec2 global_pos, ClipArea area) {
-    vec2 texture_size = vec2(textureSize(sSharedCacheA8, 0).xy);
     vec2 uv = global_pos + area.task_bounds.xy - area.screen_origin_target_index.xy;
-    vClipMaskUvBounds = area.task_bounds / texture_size.xyxy;
-    vClipMaskUv = vec3(uv / texture_size, area.screen_origin_target_index.z);
+    vClipMaskUvBounds = area.task_bounds;
+    vClipMaskUv = vec3(uv, area.screen_origin_target_index.z);
 }
 #endif //WR_VERTEX_SHADER
 
 #ifdef WR_FRAGMENT_SHADER
 
 /// Find the appropriate half range to apply the AA smoothstep over.
 /// This range represents a coefficient to go from one CSS pixel to half a device pixel.
 float compute_aa_range(vec2 position) {
@@ -802,17 +838,17 @@ vec2 init_transform_fs(vec3 local_pos, o
 
 float do_clip() {
     // anything outside of the mask is considered transparent
     bvec4 inside = lessThanEqual(
         vec4(vClipMaskUvBounds.xy, vClipMaskUv.xy),
         vec4(vClipMaskUv.xy, vClipMaskUvBounds.zw));
     // check for the dummy bounds, which are given to the opaque objects
     return vClipMaskUvBounds.xy == vClipMaskUvBounds.zw ? 1.0:
-        all(inside) ? textureLod(sSharedCacheA8, vClipMaskUv, 0.0).r : 0.0;
+        all(inside) ? texelFetch(sSharedCacheA8, ivec3(vClipMaskUv), 0).r : 0.0;
 }
 
 #ifdef WR_FEATURE_DITHERING
 vec4 dither(vec4 color) {
     const int matrix_mask = 7;
 
     ivec2 pos = ivec2(gl_FragCoord.xy) & ivec2(matrix_mask);
     float noise_normalized = (texelFetch(sDither, pos, 0).r * 255.0 + 0.5) / 64.0;
--- a/gfx/webrender/res/ps_hardware_composite.glsl
+++ b/gfx/webrender/res/ps_hardware_composite.glsl
@@ -13,17 +13,17 @@ void main(void) {
     AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
     AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
 
     vec2 dest_origin = dest_task.render_target_origin -
                        dest_task.screen_space_origin +
                        vec2(ci.user_data0, ci.user_data1);
 
     vec2 local_pos = mix(dest_origin,
-                         dest_origin + src_task.size,
+                         dest_origin + vec2(ci.user_data2, ci.user_data3),
                          aPosition.xy);
 
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
     vec2 st0 = src_task.render_target_origin;
     vec2 st1 = src_task.render_target_origin + src_task.size;
     vUv = vec3(mix(st0, st1, aPosition.xy) / texture_size, src_task.render_target_layer_index);
     vUvBounds = vec4(st0 + 0.5, st1 - 0.5) / texture_size.xyxy;
 
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -10,22 +10,23 @@ flat varying vec4 vUvBorder;
 
 #ifdef WR_FEATURE_TRANSFORM
 varying vec3 vLocalPos;
 #endif
 
 #ifdef WR_VERTEX_SHADER
 
 #define MODE_ALPHA          0
-#define MODE_SUBPX_PASS0    1
-#define MODE_SUBPX_PASS1    2
-#define MODE_SUBPX_BG_PASS0 3
-#define MODE_SUBPX_BG_PASS1 4
-#define MODE_SUBPX_BG_PASS2 5
-#define MODE_COLOR_BITMAP   6
+#define MODE_SUBPX_OPAQUE   1
+#define MODE_SUBPX_PASS0    2
+#define MODE_SUBPX_PASS1    3
+#define MODE_SUBPX_BG_PASS0 4
+#define MODE_SUBPX_BG_PASS1 5
+#define MODE_SUBPX_BG_PASS2 6
+#define MODE_COLOR_BITMAP   7
 
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.specific_prim_address);
 
     int glyph_index = prim.user_data0;
     int resource_address = prim.user_data1;
 
@@ -71,16 +72,21 @@ void main(void) {
         case MODE_SUBPX_BG_PASS2:
             vColor = text.color;
             break;
         case MODE_SUBPX_PASS0:
         case MODE_SUBPX_BG_PASS0:
         case MODE_COLOR_BITMAP:
             vColor = vec4(text.color.a);
             break;
+        case MODE_SUBPX_OPAQUE:
+            // The text foreground color is handled by the constant
+            // color blend mode.
+            vColor = vec4(1.0);
+            break;
         case MODE_SUBPX_BG_PASS1:
             // This should never be reached.
             break;
     }
 #endif
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
--- a/gfx/webrender/src/box_shadow.rs
+++ b/gfx/webrender/src/box_shadow.rs
@@ -165,17 +165,16 @@ impl FrameBuilder {
                         };
                     };
 
                     // Construct a mask primitive to add to the picture.
                     let brush_rect = LayerRect::new(LayerPoint::zero(),
                                                     LayerSize::new(width, height));
                     let brush_info = LayerPrimitiveInfo::new(brush_rect);
                     let brush_prim_index = self.create_primitive(
-                        clip_and_scroll,
                         &brush_info,
                         Vec::new(),
                         PrimitiveContainer::Brush(brush_prim),
                     );
 
                     // Create a box shadow picture and add the mask primitive to it.
                     let pic_rect = shadow_rect.inflate(blur_offset, blur_offset);
                     let mut pic_prim = PicturePrimitive::new_box_shadow(
@@ -234,17 +233,16 @@ impl FrameBuilder {
                     let brush_prim = BrushPrimitive {
                         kind: BrushKind::Mask {
                             clip_mode: brush_clip_mode,
                             kind: BrushMaskKind::RoundedRect(clip_rect, shadow_radius),
                         }
                     };
                     let brush_info = LayerPrimitiveInfo::new(brush_rect);
                     let brush_prim_index = self.create_primitive(
-                        clip_and_scroll,
                         &brush_info,
                         Vec::new(),
                         PrimitiveContainer::Brush(brush_prim),
                     );
 
                     // Create a box shadow picture primitive and add
                     // the brush primitive to it.
                     let mut pic_prim = PicturePrimitive::new_box_shadow(
--- a/gfx/webrender/src/clip.rs
+++ b/gfx/webrender/src/clip.rs
@@ -16,53 +16,59 @@ use util::{extract_inner_rect_safe, Tran
 const MAX_CLIP: f32 = 1000000.0;
 
 pub type ClipStore = FreeList<ClipSources>;
 pub type ClipSourcesHandle = FreeListHandle<ClipSources>;
 pub type ClipSourcesWeakHandle = WeakFreeListHandle<ClipSources>;
 
 #[derive(Clone, Debug)]
 pub struct ClipRegion {
-    pub origin: LayerPoint,
     pub main: LayerRect,
     pub image_mask: Option<ImageMask>,
     pub complex_clips: Vec<ComplexClipRegion>,
 }
 
 impl ClipRegion {
     pub fn create_for_clip_node(
         rect: LayerRect,
         mut complex_clips: Vec<ComplexClipRegion>,
         mut image_mask: Option<ImageMask>,
+        reference_frame_relative_offset: &LayoutVector2D,
     ) -> ClipRegion {
-        // All the coordinates we receive are relative to the stacking context, but we want
-        // to convert them to something relative to the origin of the clip.
-        let negative_origin = -rect.origin.to_vector();
+        let rect = rect.translate(reference_frame_relative_offset);
+
         if let Some(ref mut image_mask) = image_mask {
-            image_mask.rect = image_mask.rect.translate(&negative_origin);
+            image_mask.rect = image_mask.rect.translate(reference_frame_relative_offset);
         }
 
         for complex_clip in complex_clips.iter_mut() {
-            complex_clip.rect = complex_clip.rect.translate(&negative_origin);
+            complex_clip.rect = complex_clip.rect.translate(reference_frame_relative_offset);
         }
 
         ClipRegion {
-            origin: rect.origin,
-            main: LayerRect::new(LayerPoint::zero(), rect.size),
+            main: rect,
             image_mask,
             complex_clips,
         }
     }
 
-    pub fn create_for_clip_node_with_local_clip(local_clip: &LocalClip) -> ClipRegion {
+    pub fn create_for_clip_node_with_local_clip(
+        local_clip: &LocalClip,
+        reference_frame_relative_offset: &LayoutVector2D
+    ) -> ClipRegion {
         let complex_clips = match local_clip {
             &LocalClip::Rect(_) => Vec::new(),
             &LocalClip::RoundedRect(_, ref region) => vec![region.clone()],
         };
-        ClipRegion::create_for_clip_node(*local_clip.clip_rect(), complex_clips, None)
+        ClipRegion::create_for_clip_node(
+            *local_clip.clip_rect(),
+            complex_clips,
+            None,
+            reference_frame_relative_offset
+        )
     }
 }
 
 #[derive(Debug)]
 pub enum ClipSource {
     Rectangle(LayerRect),
     RoundedRectangle(LayerRect, BorderRadius, ClipMode),
     Image(ImageMask),
--- a/gfx/webrender/src/clip_scroll_node.rs
+++ b/gfx/webrender/src/clip_scroll_node.rs
@@ -1,82 +1,81 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ClipId, DeviceIntRect, LayerPixel, LayerPoint, LayerRect, LayerSize};
-use api::{LayerToScrollTransform, LayerToWorldTransform, LayerVector2D, PipelineId};
-use api::{ScrollClamping, ScrollEventPhase, ScrollLocation, ScrollSensitivity, StickyOffsetBounds};
-use api::WorldPoint;
+use api::{LayerToScrollTransform, LayerToWorldTransform, LayerVector2D, LayoutVector2D, PipelineId};
+use api::{ScrollClamping, ScrollEventPhase, ScrollLocation, ScrollSensitivity};
+use api::{StickyOffsetBounds, WorldPoint};
 use clip::{ClipRegion, ClipSources, ClipSourcesHandle, ClipStore};
 use clip_scroll_tree::{CoordinateSystemId, TransformUpdateState};
 use euclid::SideOffsets2D;
 use geometry::ray_intersects_rect;
 use gpu_cache::GpuCache;
+use gpu_types::{ClipScrollNodeIndex, ClipScrollNodeData};
 use render_task::{ClipChain, ClipChainNode, ClipWorkItem};
 use resource_cache::ResourceCache;
 use spring::{DAMPING, STIFFNESS, Spring};
 use std::rc::Rc;
-use tiling::{PackedLayer, PackedLayerIndex};
 use util::{MatrixHelpers, MaxRect};
 
 #[cfg(target_os = "macos")]
 const CAN_OVERSCROLL: bool = true;
 
 #[cfg(not(target_os = "macos"))]
 const CAN_OVERSCROLL: bool = false;
 
+const MAX_LOCAL_VIEWPORT: f32 = 1000000.0;
+
 #[derive(Debug)]
 pub struct ClipInfo {
     /// The clips for this node.
     pub clip_sources: ClipSourcesHandle,
 
-    /// The packed layer index for this node, which is used to render a clip mask
-    /// for it, if necessary.
-    pub packed_layer_index: PackedLayerIndex,
-
     /// Whether or not this clip node automatically creates a mask.
     pub is_masking: bool,
 }
 
 impl ClipInfo {
     pub fn new(
         clip_region: ClipRegion,
-        packed_layer_index: PackedLayerIndex,
         clip_store: &mut ClipStore,
     ) -> ClipInfo {
         let clip_sources = ClipSources::from(clip_region);
         let is_masking = clip_sources.is_masking();
 
         ClipInfo {
             clip_sources: clip_store.insert(clip_sources),
-            packed_layer_index,
             is_masking,
         }
     }
 }
 
 #[derive(Debug)]
 pub struct StickyFrameInfo {
     pub margins: SideOffsets2D<Option<f32>>,
     pub vertical_offset_bounds: StickyOffsetBounds,
     pub horizontal_offset_bounds: StickyOffsetBounds,
+    pub previously_applied_offset: LayoutVector2D,
     pub current_offset: LayerVector2D,
 }
 
 impl StickyFrameInfo {
     pub fn new(
         margins: SideOffsets2D<Option<f32>>,
         vertical_offset_bounds: StickyOffsetBounds,
-        horizontal_offset_bounds: StickyOffsetBounds
+        horizontal_offset_bounds: StickyOffsetBounds,
+        previously_applied_offset: LayoutVector2D
     ) -> StickyFrameInfo {
         StickyFrameInfo {
             margins,
             vertical_offset_bounds,
             horizontal_offset_bounds,
+            previously_applied_offset,
             current_offset: LayerVector2D::zero(),
         }
     }
 }
 
 #[derive(Debug)]
 pub enum NodeType {
     /// A reference frame establishes a new coordinate space in the tree.
@@ -105,17 +104,18 @@ pub struct ClipScrollNode {
     /// Clip rect of this node - typically the same as viewport rect, except
     /// in overscroll cases.
     pub local_clip_rect: LayerRect,
 
     /// Viewport rectangle clipped against parent layer(s) viewport rectangles.
     /// This is in the coordinate system of the node origin.
     /// Precisely, it combines the local clipping rectangles of all the parent
     /// nodes on the way to the root, including those of `ClipRegion` rectangles.
-    /// The combined clip is lossy/concervative on `ReferenceFrame` nodes.
+    /// The combined clip is reset to maximum when an incompatible coordinate
+    /// system is encountered.
     pub combined_local_viewport_rect: LayerRect,
 
     /// World transform for the viewport rect itself. This is the parent
     /// reference frame transformation plus the scrolling offsets provided by
     /// the nodes in between the reference frame and this node.
     pub world_viewport_transform: LayerToWorldTransform,
 
     /// World transform for content transformed by this node.
@@ -143,16 +143,20 @@ pub struct ClipScrollNode {
     /// generate clip tasks.
     pub clip_chain_node: ClipChain,
 
     /// The intersected outer bounds of the clips for this node.
     pub combined_clip_outer_bounds: DeviceIntRect,
 
     /// The axis-aligned coordinate system id of this node.
     pub coordinate_system_id: CoordinateSystemId,
+
+    /// A linear ID / index of this clip-scroll node. Used as a reference to
+    /// pass to shaders, to allow them to fetch a given clip-scroll node.
+    pub id: ClipScrollNodeIndex,
 }
 
 impl ClipScrollNode {
     fn new(
         pipeline_id: PipelineId,
         parent_id: Option<ClipId>,
         rect: &LayerRect,
         node_type: NodeType
@@ -166,16 +170,17 @@ impl ClipScrollNode {
             reference_frame_relative_scroll_offset: LayerVector2D::zero(),
             parent: parent_id,
             children: Vec::new(),
             pipeline_id,
             node_type: node_type,
             clip_chain_node: None,
             combined_clip_outer_bounds: DeviceIntRect::max_rect(),
             coordinate_system_id: CoordinateSystemId(0),
+            id: ClipScrollNodeIndex(0),
         }
     }
 
     pub fn new_scroll_frame(
         pipeline_id: PipelineId,
         parent_id: ClipId,
         frame_rect: &LayerRect,
         content_size: &LayerSize,
@@ -282,86 +287,66 @@ impl ClipScrollNode {
         scrolling.bouncing_back = false;
         scrolling.started_bouncing_back = false;
         true
     }
 
     pub fn update_clip_work_item(
         &mut self,
         state: &mut TransformUpdateState,
-        screen_rect: &DeviceIntRect,
         device_pixel_ratio: f32,
-        packed_layers: &mut Vec<PackedLayer>,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
     ) {
-        self.coordinate_system_id = state.current_coordinate_system_id;
-
         let current_clip_chain = state.parent_clip_chain.clone();
         let clip_info = match self.node_type {
             NodeType::Clip(ref mut info) if info.is_masking => info,
             _ => {
                 self.clip_chain_node = current_clip_chain;
                 self.combined_clip_outer_bounds = state.combined_outer_clip_bounds;
                 return;
             }
         };
 
-        // The coordinates of the mask are relative to the origin of the node itself,
-        // so we need to account for that origin in the transformation we assign to
-        // the packed layer.
-        let transform = self.world_viewport_transform
-            .pre_translate(self.local_viewport_rect.origin.to_vector().to_3d());
-
-        let packed_layer = &mut packed_layers[clip_info.packed_layer_index.0];
-        if packed_layer.set_transform(transform) {
-            // Meanwhile, the combined viewport rect is relative to the reference frame, so
-            // we move it into the local coordinate system of the node.
-            let local_viewport_rect = self.combined_local_viewport_rect
-                .translate(&-self.local_viewport_rect.origin.to_vector());
-
-            packed_layer.set_rect(
-                &local_viewport_rect,
-                screen_rect,
-                device_pixel_ratio,
-            );
-        }
-
         let clip_sources = clip_store.get_mut(&clip_info.clip_sources);
         clip_sources.update(
-            &transform,
+            &self.world_viewport_transform,
             gpu_cache,
             resource_cache,
             device_pixel_ratio,
         );
 
         let outer_bounds = clip_sources.bounds.outer.as_ref().map_or_else(
             DeviceIntRect::zero,
             |rect| rect.device_rect
         );
 
         self.combined_clip_outer_bounds = outer_bounds.intersection(
             &state.combined_outer_clip_bounds).unwrap_or_else(DeviceIntRect::zero);
 
         // TODO: Combine rectangles in the same axis-aligned clip space here?
         self.clip_chain_node = Some(Rc::new(ClipChainNode {
             work_item: ClipWorkItem {
-                layer_index: clip_info.packed_layer_index,
+                scroll_node_id: self.id,
                 clip_sources: clip_info.clip_sources.weak(),
                 coordinate_system_id: state.current_coordinate_system_id,
             },
             prev: current_clip_chain,
         }));
 
         state.combined_outer_clip_bounds = self.combined_clip_outer_bounds;
         state.parent_clip_chain = self.clip_chain_node.clone();
     }
 
-    pub fn update_transform(&mut self, state: &mut TransformUpdateState) {
+    pub fn update_transform(
+        &mut self,
+        state: &mut TransformUpdateState,
+        node_data: &mut Vec<ClipScrollNodeData>,
+    ) {
         // We calculate this here to avoid a double-borrow later.
         let sticky_offset = self.calculate_sticky_offset(
             &state.nearest_scrolling_ancestor_offset,
             &state.nearest_scrolling_ancestor_viewport,
         );
 
         let (local_transform, accumulated_scroll_offset) = match self.node_type {
             NodeType::ReferenceFrame(ref info) => {
@@ -447,16 +432,49 @@ impl ClipScrollNode {
                 // We don't translate the combined rect by the sticky offset, because sticky
                 // offsets actually adjust the node position itself, whereas scroll offsets
                 // only apply to contents inside the node.
                 state.parent_combined_viewport_rect = self.combined_local_viewport_rect;
                 state.parent_accumulated_scroll_offset =
                     info.current_offset + state.parent_accumulated_scroll_offset;
             }
         }
+
+        // Store coord system ID, and also the ID used for shaders to reference this node.
+        self.coordinate_system_id = state.current_coordinate_system_id;
+        self.id = ClipScrollNodeIndex(node_data.len() as u32);
+
+        let local_clip_rect = if self.world_content_transform.has_perspective_component() {
+            LayerRect::new(
+                LayerPoint::new(-MAX_LOCAL_VIEWPORT, -MAX_LOCAL_VIEWPORT),
+                LayerSize::new(2.0 * MAX_LOCAL_VIEWPORT, 2.0 * MAX_LOCAL_VIEWPORT)
+            )
+        } else {
+            self.combined_local_viewport_rect
+        };
+
+        let data = match self.world_content_transform.inverse() {
+            Some(inverse) => {
+                ClipScrollNodeData {
+                    transform: self.world_content_transform,
+                    inv_transform: inverse,
+                    local_clip_rect,
+                    reference_frame_relative_scroll_offset: self.reference_frame_relative_scroll_offset,
+                    scroll_offset: self.scroll_offset(),
+                }
+            }
+            None => {
+                state.combined_outer_clip_bounds = DeviceIntRect::zero();
+
+                ClipScrollNodeData::invalid()
+            }
+        };
+
+        // Write the data that will be made available to the GPU for this node.
+        node_data.push(data);
     }
 
     fn calculate_sticky_offset(
         &self,
         viewport_scroll_offset: &LayerVector2D,
         viewport_rect: &LayerRect,
     ) -> LayerVector2D {
         let info = match self.node_type {
@@ -473,64 +491,97 @@ impl ClipScrollNode {
         // be offset in order to keep it on screen. Since we care about the relationship
         // between the scrolled content and unscrolled viewport we adjust the viewport's
         // position by the scroll offset in order to work with their relative positions on the
         // page.
         let sticky_rect = self.local_viewport_rect.translate(viewport_scroll_offset);
 
         let mut sticky_offset = LayerVector2D::zero();
         if let Some(margin) = info.margins.top {
-            // If the sticky rect is positioned above the top edge of the viewport (plus margin)
-            // we move it down so that it is fully inside the viewport.
             let top_viewport_edge = viewport_rect.min_y() + margin;
             if sticky_rect.min_y() < top_viewport_edge {
-                 sticky_offset.y = top_viewport_edge - sticky_rect.min_y();
+                // If the sticky rect is positioned above the top edge of the viewport (plus margin)
+                // we move it down so that it is fully inside the viewport.
+                sticky_offset.y = top_viewport_edge - sticky_rect.min_y();
+            } else if info.previously_applied_offset.y > 0.0 &&
+                sticky_rect.min_y() > top_viewport_edge {
+                // However, if the sticky rect is positioned *below* the top edge of the viewport
+                // and there is already some offset applied to the sticky rect's position, then
+                // we need to move it up so that it remains at the correct position. This
+                // makes sticky_offset.y negative and effectively reduces the amount of the
+                // offset that was already applied. We limit the reduction so that it can, at most,
+                // cancel out the already-applied offset, but should never end up adjusting the
+                // position the other way.
+                sticky_offset.y = top_viewport_edge - sticky_rect.min_y();
+                sticky_offset.y = sticky_offset.y.max(-info.previously_applied_offset.y);
             }
-            debug_assert!(sticky_offset.y >= 0.0);
+            debug_assert!(sticky_offset.y + info.previously_applied_offset.y >= 0.0);
         }
 
-        if sticky_offset.y == 0.0 {
+        // If we don't have a sticky-top offset (sticky_offset.y + info.previously_applied_offset.y
+        // == 0), or if we have a previously-applied bottom offset (previously_applied_offset.y < 0)
+        // then we check for handling the bottom margin case.
+        if sticky_offset.y + info.previously_applied_offset.y <= 0.0 {
             if let Some(margin) = info.margins.bottom {
-                // If the bottom of the sticky rect is positioned below the bottom viewport edge
-                // (accounting for margin), we move it up so that it is fully inside the viewport.
+                // Same as the above case, but inverted for bottom-sticky items. Here
+                // we adjust items upwards, resulting in a negative sticky_offset.y,
+                // or reduce the already-present upward adjustment, resulting in a positive
+                // sticky_offset.y.
                 let bottom_viewport_edge = viewport_rect.max_y() - margin;
                 if sticky_rect.max_y() > bottom_viewport_edge {
-                     sticky_offset.y = bottom_viewport_edge - sticky_rect.max_y();
+                    sticky_offset.y = bottom_viewport_edge - sticky_rect.max_y();
+                } else if info.previously_applied_offset.y < 0.0 &&
+                    sticky_rect.max_y() < bottom_viewport_edge {
+                    sticky_offset.y = bottom_viewport_edge - sticky_rect.max_y();
+                    sticky_offset.y = sticky_offset.y.min(-info.previously_applied_offset.y);
                 }
-                debug_assert!(sticky_offset.y <= 0.0);
+                debug_assert!(sticky_offset.y + info.previously_applied_offset.y <= 0.0);
             }
         }
 
+        // Same as above, but for the x-axis.
         if let Some(margin) = info.margins.left {
-            // If the sticky rect is positioned left of the left edge of the viewport (plus margin)
-            // we move it right so that it is fully inside the viewport.
             let left_viewport_edge = viewport_rect.min_x() + margin;
             if sticky_rect.min_x() < left_viewport_edge {
-                 sticky_offset.x = left_viewport_edge - sticky_rect.min_x();
+                sticky_offset.x = left_viewport_edge - sticky_rect.min_x();
+            } else if info.previously_applied_offset.x > 0.0 &&
+                sticky_rect.min_x() > left_viewport_edge {
+                sticky_offset.x = left_viewport_edge - sticky_rect.min_x();
+                sticky_offset.x = sticky_offset.x.max(-info.previously_applied_offset.x);
             }
-            debug_assert!(sticky_offset.x >= 0.0);
+            debug_assert!(sticky_offset.x + info.previously_applied_offset.x >= 0.0);
         }
 
-        if sticky_offset.x == 0.0 {
+        if sticky_offset.x + info.previously_applied_offset.x <= 0.0 {
             if let Some(margin) = info.margins.right {
-                // If the right edge of the sticky rect is positioned right of the right viewport
-                // edge (accounting for margin), we move it left so that it is fully inside the
-                // viewport.
                 let right_viewport_edge = viewport_rect.max_x() - margin;
                 if sticky_rect.max_x() > right_viewport_edge {
-                     sticky_offset.x = right_viewport_edge - sticky_rect.max_x();
+                    sticky_offset.x = right_viewport_edge - sticky_rect.max_x();
+                } else if info.previously_applied_offset.x < 0.0 &&
+                    sticky_rect.max_x() < right_viewport_edge {
+                    sticky_offset.x = right_viewport_edge - sticky_rect.max_x();
+                    sticky_offset.x = sticky_offset.x.min(-info.previously_applied_offset.x);
                 }
-                debug_assert!(sticky_offset.x <= 0.0);
+                debug_assert!(sticky_offset.x + info.previously_applied_offset.x <= 0.0);
             }
         }
 
-        sticky_offset.y = sticky_offset.y.max(info.vertical_offset_bounds.min);
-        sticky_offset.y = sticky_offset.y.min(info.vertical_offset_bounds.max);
-        sticky_offset.x = sticky_offset.x.max(info.horizontal_offset_bounds.min);
-        sticky_offset.x = sticky_offset.x.min(info.horizontal_offset_bounds.max);
+        // The total "sticky offset" (which is the sum that was already applied by
+        // the calling code, stored in info.previously_applied_offset, and the extra amount we
+        // computed as a result of scrolling, stored in sticky_offset) needs to be
+        // clamped to the provided bounds.
+        let clamp_adjusted = |value: f32, adjust: f32, bounds: &StickyOffsetBounds| {
+            (value + adjust).max(bounds.min).min(bounds.max) - adjust
+        };
+        sticky_offset.y = clamp_adjusted(sticky_offset.y,
+                                         info.previously_applied_offset.y,
+                                         &info.vertical_offset_bounds);
+        sticky_offset.x = clamp_adjusted(sticky_offset.x,
+                                         info.previously_applied_offset.x,
+                                         &info.horizontal_offset_bounds);
 
         sticky_offset
     }
 
     pub fn scrollable_size(&self) -> LayerSize {
         match self.node_type {
            NodeType:: ScrollFrame(state) => state.scrollable_size,
             _ => LayerSize::zero(),
--- a/gfx/webrender/src/clip_scroll_tree.rs
+++ b/gfx/webrender/src/clip_scroll_tree.rs
@@ -3,21 +3,21 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ClipId, DeviceIntRect, LayerPoint, LayerRect, LayerToScrollTransform};
 use api::{LayerToWorldTransform, LayerVector2D, PipelineId, ScrollClamping, ScrollEventPhase};
 use api::{ScrollLayerState, ScrollLocation, WorldPoint};
 use clip::ClipStore;
 use clip_scroll_node::{ClipScrollNode, NodeType, ScrollingState, StickyFrameInfo};
 use gpu_cache::GpuCache;
+use gpu_types::ClipScrollNodeData;
 use internal_types::{FastHashMap, FastHashSet};
 use print_tree::{PrintTree, PrintTreePrinter};
 use render_task::ClipChain;
 use resource_cache::ResourceCache;
-use tiling::PackedLayer;
 
 pub type ScrollStates = FastHashMap<ClipId, ScrollingState>;
 
 /// An id that identifies coordinate systems in the ClipScrollTree. Each
 /// coordinate system has an id and those ids will be shared when the coordinates
 /// system are the same or are in the same axis-aligned space. This allows
 /// for optimizing mask generation.
 #[derive(Debug, Copy, Clone, PartialEq)]
@@ -324,21 +324,21 @@ impl ClipScrollTree {
             .unwrap()
             .scroll(scroll_location, phase)
     }
 
     pub fn update_all_node_transforms(
         &mut self,
         screen_rect: &DeviceIntRect,
         device_pixel_ratio: f32,
-        packed_layers: &mut Vec<PackedLayer>,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         pan: LayerPoint,
+        node_data: &mut Vec<ClipScrollNodeData>,
     ) {
         if self.nodes.is_empty() {
             return;
         }
 
         let root_reference_frame_id = self.root_reference_frame_id();
         let root_viewport = self.nodes[&root_reference_frame_id].local_clip_rect;
 
@@ -355,69 +355,67 @@ impl ClipScrollTree {
             parent_clip_chain: None,
             combined_outer_clip_bounds: *screen_rect,
             current_coordinate_system_id: CoordinateSystemId(0),
             next_coordinate_system_id: CoordinateSystemId(0).next(),
         };
         self.update_node_transform(
             root_reference_frame_id,
             &mut state,
-            &screen_rect,
             device_pixel_ratio,
-            packed_layers,
             clip_store,
             resource_cache,
             gpu_cache,
+            node_data,
         );
     }
 
     fn update_node_transform(
         &mut self,
         layer_id: ClipId,
         state: &mut TransformUpdateState,
-        screen_rect: &DeviceIntRect,
         device_pixel_ratio: f32,
-        packed_layers: &mut Vec<PackedLayer>,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
+        node_data: &mut Vec<ClipScrollNodeData>,
     ) {
         // TODO(gw): This is an ugly borrow check workaround to clone these.
         //           Restructure this to avoid the clones!
         let mut state = state.clone();
         let node_children = {
             let node = match self.nodes.get_mut(&layer_id) {
                 Some(node) => node,
                 None => return,
             };
 
-            node.update_transform(&mut state);
+            node.update_transform(
+                &mut state,
+                node_data
+            );
             node.update_clip_work_item(
                 &mut state,
-                screen_rect,
                 device_pixel_ratio,
-                packed_layers,
                 clip_store,
                 resource_cache,
                 gpu_cache,
             );
 
             node.children.clone()
         };
 
         for child_layer_id in node_children {
             self.update_node_transform(
                 child_layer_id,
                 &mut state,
-                screen_rect,
                 device_pixel_ratio,
-                packed_layers,
                 clip_store,
                 resource_cache,
                 gpu_cache,
+                node_data,
             );
         }
     }
 
     pub fn tick_scrolling_bounce_animations(&mut self) {
         for (_, node) in &mut self.nodes {
             node.tick_scrolling_bounce_animation()
         }
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -1,14 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use super::shader_source;
-use api::ImageFormat;
+use api::{ColorF, ImageFormat};
 use api::{DeviceIntRect, DeviceUintSize};
 use euclid::Transform3D;
 use gleam::gl;
 use internal_types::RenderTargetMode;
 use std::fs::File;
 use std::io::Read;
 use std::iter::repeat;
 use std::mem;
@@ -1920,16 +1920,22 @@ impl Device {
     pub fn set_blend_mode_subpixel_with_bg_color_pass1(&self) {
         self.gl.blend_func_separate(gl::ONE_MINUS_DST_ALPHA, gl::ONE, gl::ZERO, gl::ONE);
         self.gl.blend_equation(gl::FUNC_ADD);
     }
     pub fn set_blend_mode_subpixel_with_bg_color_pass2(&self) {
         self.gl.blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE_MINUS_SRC_ALPHA);
         self.gl.blend_equation(gl::FUNC_ADD);
     }
+    pub fn set_blend_mode_subpixel_opaque(&self, color: ColorF) {
+        self.gl.blend_color(color.r, color.g, color.b, color.a);
+        self.gl
+            .blend_func(gl::CONSTANT_COLOR, gl::ONE_MINUS_SRC_COLOR);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
 }
 
 /// return (gl_internal_format, gl_format)
 fn gl_texture_formats_for_image_format(
     gl: &gl::Gl,
     format: ImageFormat,
 ) -> (gl::GLint, gl::GLuint) {
     match format {
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -317,18 +317,20 @@ impl<'a> FlattenContext<'a> {
         local_clip: &LocalClip,
         reference_frame_relative_offset: LayerVector2D,
     ) {
         let pipeline = match self.scene.pipelines.get(&pipeline_id) {
             Some(pipeline) => pipeline,
             None => return,
         };
 
-        let mut clip_region = ClipRegion::create_for_clip_node_with_local_clip(local_clip);
-        clip_region.origin += reference_frame_relative_offset;
+        let clip_region = ClipRegion::create_for_clip_node_with_local_clip(
+            local_clip,
+            &reference_frame_relative_offset
+        );
         let parent_pipeline_id = parent_id.pipeline_id();
         let clip_id = self.clip_scroll_tree
             .generate_new_clip_id(parent_pipeline_id);
         self.builder.add_clip_node(
             clip_id,
             parent_id,
             parent_pipeline_id,
             clip_region,
@@ -547,39 +549,37 @@ impl<'a> FlattenContext<'a> {
                     clip_and_scroll.scroll_node_id,
                     &item.rect(),
                     &item.local_clip(),
                     reference_frame_relative_offset,
                 );
             }
             SpecificDisplayItem::Clip(ref info) => {
                 let complex_clips = self.get_complex_clips(pipeline_id, item.complex_clip().0);
-                let mut clip_region = ClipRegion::create_for_clip_node(
+                let clip_region = ClipRegion::create_for_clip_node(
                     *item.local_clip().clip_rect(),
                     complex_clips,
                     info.image_mask,
+                    &reference_frame_relative_offset,
                 );
-                clip_region.origin += reference_frame_relative_offset;
-
                 self.flatten_clip(
                     pipeline_id,
                     &clip_and_scroll.scroll_node_id,
                     &info.id,
                     clip_region,
                 );
             }
             SpecificDisplayItem::ScrollFrame(ref info) => {
                 let complex_clips = self.get_complex_clips(pipeline_id, item.complex_clip().0);
-                let mut clip_region = ClipRegion::create_for_clip_node(
+                let clip_region = ClipRegion::create_for_clip_node(
                     *item.local_clip().clip_rect(),
                     complex_clips,
                     info.image_mask,
+                    &reference_frame_relative_offset,
                 );
-                clip_region.origin += reference_frame_relative_offset;
-
                 // Just use clip rectangle as the frame rect for this scroll frame.
                 // This is useful when calculating scroll extents for the
                 // ClipScrollNode::scroll(..) API as well as for properly setting sticky
                 // positioning offsets.
                 let frame_rect = item.local_clip()
                     .clip_rect()
                     .translate(&reference_frame_relative_offset);
                 let content_rect = item.rect().translate(&reference_frame_relative_offset);
@@ -594,16 +594,17 @@ impl<'a> FlattenContext<'a> {
                 );
             }
             SpecificDisplayItem::StickyFrame(ref info) => {
                 let frame_rect = item.rect().translate(&reference_frame_relative_offset);
                 let sticky_frame_info = StickyFrameInfo::new(
                     info.margins,
                     info.vertical_offset_bounds,
                     info.horizontal_offset_bounds,
+                    info.previously_applied_offset,
                 );
                 self.clip_scroll_tree.add_sticky_frame(
                     info.id,
                     clip_and_scroll.scroll_node_id, /* parent id */
                     frame_rect,
                     sticky_frame_info
                 );
             }
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -11,37 +11,37 @@ use api::{ImageKey, ImageRendering, Item
 use api::{LayerPixel, LayerSize, LayerToScrollTransform, LayerVector2D, LayoutVector2D, LineOrientation};
 use api::{LineStyle, LocalClip, PipelineId, RepeatMode};
 use api::{ScrollSensitivity, Shadow, TileOffset, TransformStyle};
 use api::{WorldPixel, WorldPoint, YuvColorSpace, YuvData, device_length};
 use app_units::Au;
 use border::ImageBorderSegment;
 use clip::{ClipRegion, ClipSource, ClipSources, ClipStore, Contains};
 use clip_scroll_node::{ClipInfo, ClipScrollNode, NodeType};
-use clip_scroll_tree::{ClipScrollTree, CoordinateSystemId};
+use clip_scroll_tree::{ClipScrollTree};
 use euclid::{SideOffsets2D, TypedTransform3D, vec2, vec3};
 use frame::FrameId;
 use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet, HardwareCompositeOp};
 use picture::{PicturePrimitive};
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{TexelRect, YuvImagePrimitiveCpu};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, LinePrimitive, PrimitiveKind};
 use prim_store::{PrimitiveContainer, PrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
 use prim_store::{RectangleContent, RectanglePrimitive, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
-use render_task::{AlphaRenderItem, ClearMode, ClipChain, RenderTask, RenderTaskId, RenderTaskLocation};
+use render_task::{AlphaRenderItem, ClearMode, RenderTask, RenderTaskId, RenderTaskLocation};
 use render_task::RenderTaskTree;
 use resource_cache::ResourceCache;
 use scene::ScenePipeline;
 use std::{mem, usize, f32, i32};
-use tiling::{ClipScrollGroup, ClipScrollGroupIndex, CompositeOps, Frame};
+use tiling::{CompositeOps, Frame};
 use tiling::{ContextIsolation, RenderTargetKind, StackingContextIndex};
-use tiling::{PackedLayer, PackedLayerIndex, PrimitiveFlags, PrimitiveRunCmd, RenderPass};
+use tiling::{PrimitiveFlags, PrimitiveRunCmd, RenderPass};
 use tiling::{RenderTargetContext, ScrollbarPrimitive, StackingContext};
 use util::{self, pack_as_float, RectHelpers, recycle_vec};
 use box_shadow::BLUR_SAMPLE_SCALE;
 
 /// Construct a polygon from stacking context boundaries.
 /// `anchor` here is an index that's going to be preserved in all the
 /// splits of the polygon.
 fn make_polygon(
@@ -108,21 +108,16 @@ pub struct FrameBuilder {
     background_color: Option<ColorF>,
     prim_store: PrimitiveStore,
     pub clip_store: ClipStore,
     cmds: Vec<PrimitiveRunCmd>,
     hit_testing_runs: Vec<HitTestingRun>,
     pub config: FrameBuilderConfig,
 
     stacking_context_store: Vec<StackingContext>,
-    clip_scroll_group_store: Vec<ClipScrollGroup>,
-    // Note: value here is meant to be `ClipScrollGroupIndex`,
-    // but we already have `ClipAndScrollInfo` in the key
-    clip_scroll_group_indices: FastHashMap<ClipAndScrollInfo, usize>,
-    packed_layers: Vec<PackedLayer>,
 
     // A stack of the current shadow primitives.
     // The sub-Vec stores a buffer of fast-path primitives to be appended on pop.
     shadow_prim_stack: Vec<(PrimitiveIndex, Vec<(PrimitiveIndex, ClipAndScrollInfo)>)>,
     // If we're doing any fast-path shadows, we buffer the "real"
     // content here, to be appended when the shadow stack is empty.
     pending_shadow_contents: Vec<(PrimitiveIndex, ClipAndScrollInfo, LayerPrimitiveInfo)>,
 
@@ -136,84 +131,66 @@ pub struct FrameBuilder {
     /// primitives are added to the frame.
     stacking_context_stack: Vec<StackingContextIndex>,
 
     /// Whether or not we've pushed a root stacking context for the current pipeline.
     has_root_stacking_context: bool,
 }
 
 pub struct PrimitiveContext<'a> {
-    pub packed_layer_index: PackedLayerIndex,
-    pub packed_layer: &'a PackedLayer,
     pub device_pixel_ratio: f32,
-    pub clip_chain: ClipChain,
-    pub clip_bounds: DeviceIntRect,
-    pub clip_id: ClipId,
-    pub coordinate_system_id: CoordinateSystemId,
     pub display_list: &'a BuiltDisplayList,
+    pub clip_node: &'a ClipScrollNode,
+    pub scroll_node: &'a ClipScrollNode,
 }
 
 impl<'a> PrimitiveContext<'a> {
     fn new(
-        packed_layer_index: PackedLayerIndex,
-        packed_layer: &'a PackedLayer,
-        clip_id: ClipId,
-        clip_chain: ClipChain,
-        clip_bounds: DeviceIntRect,
-        coordinate_system_id: CoordinateSystemId,
         device_pixel_ratio: f32,
         display_list: &'a BuiltDisplayList,
+        clip_node: &'a ClipScrollNode,
+        scroll_node: &'a ClipScrollNode,
     ) -> Self {
         PrimitiveContext {
-            packed_layer_index,
-            packed_layer,
-            clip_chain,
-            clip_bounds,
-            coordinate_system_id,
             device_pixel_ratio,
-            clip_id,
             display_list,
+            clip_node,
+            scroll_node,
         }
     }
 }
 
 impl FrameBuilder {
     pub fn new(
         previous: Option<Self>,
         screen_size: DeviceUintSize,
         background_color: Option<ColorF>,
         config: FrameBuilderConfig,
     ) -> Self {
         match previous {
             Some(prev) => FrameBuilder {
                 stacking_context_store: recycle_vec(prev.stacking_context_store),
-                clip_scroll_group_store: recycle_vec(prev.clip_scroll_group_store),
-                clip_scroll_group_indices: FastHashMap::default(),
                 cmds: recycle_vec(prev.cmds),
                 hit_testing_runs: recycle_vec(prev.hit_testing_runs),
-                packed_layers: recycle_vec(prev.packed_layers),
                 shadow_prim_stack: recycle_vec(prev.shadow_prim_stack),
                 pending_shadow_contents: recycle_vec(prev.pending_shadow_contents),
                 scrollbar_prims: recycle_vec(prev.scrollbar_prims),
                 reference_frame_stack: recycle_vec(prev.reference_frame_stack),
                 stacking_context_stack: recycle_vec(prev.stacking_context_stack),
                 prim_store: prev.prim_store.recycle(),
                 clip_store: prev.clip_store.recycle(),
                 screen_size,
                 background_color,
                 config,
                 has_root_stacking_context: false,
             },
             None => FrameBuilder {
                 stacking_context_store: Vec::new(),
-                clip_scroll_group_store: Vec::new(),
-                clip_scroll_group_indices: FastHashMap::default(),
                 cmds: Vec::new(),
                 hit_testing_runs: Vec::new(),
-                packed_layers: Vec::new(),
                 shadow_prim_stack: Vec::new(),
                 pending_shadow_contents: Vec::new(),
                 scrollbar_prims: Vec::new(),
                 reference_frame_stack: Vec::new(),
                 stacking_context_stack: Vec::new(),
                 prim_store: PrimitiveStore::new(),
                 clip_store: ClipStore::new(),
                 screen_size,
@@ -224,26 +201,20 @@ impl FrameBuilder {
         }
     }
 
     /// Create a primitive and add it to the prim store. This method doesn't
     /// add the primitive to the draw list, so can be used for creating
     /// sub-primitives.
     pub fn create_primitive(
         &mut self,
-        clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
         mut clip_sources: Vec<ClipSource>,
         container: PrimitiveContainer,
     ) -> PrimitiveIndex {
-        if !self.clip_scroll_group_indices.contains_key(&clip_and_scroll) {
-            let group_id = self.create_clip_scroll_group(&clip_and_scroll);
-            self.clip_scroll_group_indices.insert(clip_and_scroll, group_id);
-        }
-
         if let &LocalClip::RoundedRect(main, region) = &info.local_clip {
             clip_sources.push(ClipSource::Rectangle(main));
             clip_sources.push(ClipSource::RoundedRectangle(
                 region.rect,
                 region.radii,
                 region.mode,
             ));
         }
@@ -317,38 +288,22 @@ impl FrameBuilder {
     pub fn add_primitive(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
         clip_sources: Vec<ClipSource>,
         container: PrimitiveContainer,
     ) -> PrimitiveIndex {
         self.add_primitive_to_hit_testing_list(info, clip_and_scroll);
-        let prim_index = self.create_primitive(clip_and_scroll, info, clip_sources, container);
+        let prim_index = self.create_primitive(info, clip_sources, container);
 
         self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
         prim_index
     }
 
-    fn create_clip_scroll_group(&mut self, info: &ClipAndScrollInfo) -> usize {
-        let packed_layer_index = PackedLayerIndex(self.packed_layers.len());
-        self.packed_layers.push(PackedLayer::empty());
-
-        let group_id = self.clip_scroll_group_store.len();
-        self.clip_scroll_group_store.push(ClipScrollGroup {
-            scroll_node_id: info.scroll_node_id,
-            clip_node_id: info.clip_node_id(),
-            packed_layer_index,
-            screen_bounding_rect: None,
-            coordinate_system_id: CoordinateSystemId(0),
-        });
-
-        group_id
-    }
-
     pub fn notify_waiting_for_root_stacking_context(&mut self) {
         self.has_root_stacking_context = false;
     }
 
     pub fn push_stacking_context(
         &mut self,
         reference_frame_offset: &LayerVector2D,
         pipeline_id: PipelineId,
@@ -512,25 +467,23 @@ impl FrameBuilder {
     pub fn add_clip_node(
         &mut self,
         new_node_id: ClipId,
         parent_id: ClipId,
         pipeline_id: PipelineId,
         clip_region: ClipRegion,
         clip_scroll_tree: &mut ClipScrollTree,
     ) {
-        let clip_rect = LayerRect::new(clip_region.origin, clip_region.main.size);
+        let clip_rect = clip_region.main;
         let clip_info = ClipInfo::new(
             clip_region,
-            PackedLayerIndex(self.packed_layers.len()),
             &mut self.clip_store,
         );
         let node = ClipScrollNode::new_clip_node(pipeline_id, parent_id, clip_info, clip_rect);
         clip_scroll_tree.add_node(node, new_node_id);
-        self.packed_layers.push(PackedLayer::empty());
     }
 
     pub fn add_scroll_frame(
         &mut self,
         new_node_id: ClipId,
         parent_id: ClipId,
         pipeline_id: PipelineId,
         frame_rect: &LayerRect,
@@ -561,17 +514,16 @@ impl FrameBuilder {
     ) {
         let prim = PicturePrimitive::new_text_shadow(shadow);
 
         // Create an empty shadow primitive. Insert it into
         // the draw lists immediately so that it will be drawn
         // before any visual text elements that are added as
         // part of this shadow context.
         let prim_index = self.create_primitive(
-            clip_and_scroll,
             info,
             Vec::new(),
             PrimitiveContainer::Picture(prim),
         );
 
         let pending = vec![(prim_index, clip_and_scroll)];
         self.shadow_prim_stack.push((prim_index, pending));
     }
@@ -671,26 +623,24 @@ impl FrameBuilder {
         }
 
         for (idx, shadow) in fast_shadow_prims {
             let mut line = line.clone();
             line.color = shadow.color;
             let mut info = info.clone();
             info.rect = info.rect.translate(&shadow.offset);
             let prim_index = self.create_primitive(
-                clip_and_scroll,
                 &info,
                 Vec::new(),
                 PrimitiveContainer::Line(line),
             );
             self.shadow_prim_stack[idx].1.push((prim_index, clip_and_scroll));
         }
 
         let prim_index = self.create_primitive(
-            clip_and_scroll,
             &info,
             Vec::new(),
             PrimitiveContainer::Line(line),
         );
 
         if color.a > 0.0 {
             if self.shadow_prim_stack.is_empty() {
                 self.add_primitive_to_hit_testing_list(&info, clip_and_scroll);
@@ -1178,28 +1128,26 @@ impl FrameBuilder {
             }
         }
 
         for (idx, text_prim) in fast_shadow_prims {
             let rect = info.rect;
             let mut info = info.clone();
             info.rect = rect.translate(&text_prim.offset);
             let prim_index = self.create_primitive(
-                clip_and_scroll,
                 &info,
                 Vec::new(),
                 PrimitiveContainer::TextRun(text_prim),
             );
             self.shadow_prim_stack[idx].1.push((prim_index, clip_and_scroll));
         }
 
         // Create (and add to primitive store) the primitive that will be
         // used for both the visual element and also the shadow(s).
         let prim_index = self.create_primitive(
-            clip_and_scroll,
             info,
             Vec::new(),
             PrimitiveContainer::TextRun(prim),
         );
 
         // Only add a visual element if it can contribute to the scene.
         if color.a > 0.0 {
             if self.shadow_prim_stack.is_empty() {
@@ -1312,29 +1260,16 @@ impl FrameBuilder {
         // the initial adding of items for the common case (where there is only a single
         // scroll layer for items in a stacking context).
         let stacking_context =
             &mut self.stacking_context_store[stacking_context_index.0];
         stacking_context.screen_bounds = DeviceIntRect::zero();
         stacking_context.isolated_items_bounds = LayerRect::zero();
     }
 
-    pub fn get_packed_layer_index_if_visible(
-        &self,
-        clip_and_scroll: &ClipAndScrollInfo
-    ) -> Option<PackedLayerIndex> {
-        let group_id = self.clip_scroll_group_indices[&clip_and_scroll];
-        let clip_scroll_group = &self.clip_scroll_group_store[group_id];
-        if clip_scroll_group.is_visible() {
-            Some(clip_scroll_group.packed_layer_index)
-        } else {
-            None
-        }
-    }
-
     pub fn hit_test(
         &self,
         clip_scroll_tree: &ClipScrollTree,
         pipeline_id: Option<PipelineId>,
         point: WorldPoint,
         flags: HitTestFlags
     ) -> HitTestResult {
         let point = if flags.contains(HitTestFlags::POINT_RELATIVE_TO_PIPELINE_VIEWPORT) {
@@ -1409,88 +1344,56 @@ impl FrameBuilder {
         gpu_cache: &mut GpuCache,
         resource_cache: &mut ResourceCache,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         clip_scroll_tree: &ClipScrollTree,
         device_pixel_ratio: f32,
         profile_counters: &mut FrameProfileCounters,
     ) -> bool {
         let stacking_context_index = *self.stacking_context_stack.last().unwrap();
-        let packed_layer_index =
-            match self.get_packed_layer_index_if_visible(&clip_and_scroll) {
-            Some(index) => index,
-            None => {
-                debug!("{:?} of invisible {:?}", base_prim_index, stacking_context_index);
-                return false;
-            }
-        };
+        let scroll_node = &clip_scroll_tree.nodes[&clip_and_scroll.scroll_node_id];
+        let clip_node = &clip_scroll_tree.nodes[&clip_and_scroll.clip_node_id()];
 
-        let (clip_chain, clip_bounds, coordinate_system_id) =
-            match clip_scroll_tree.nodes.get(&clip_and_scroll.clip_node_id()) {
-            Some(node) if node.combined_clip_outer_bounds != DeviceIntRect::zero() => {
-                let group_id = self.clip_scroll_group_indices[&clip_and_scroll];
-                (
-                    node.clip_chain_node.clone(),
-                    node.combined_clip_outer_bounds,
-                    self.clip_scroll_group_store[group_id].coordinate_system_id,
-                )
-            }
-            _ => {
-                let group_id = self.clip_scroll_group_indices[&clip_and_scroll];
-                self.clip_scroll_group_store[group_id].screen_bounding_rect = None;
-
-                debug!("{:?} of clipped out {:?}", base_prim_index, stacking_context_index);
-                return false;
-            }
-        };
+        if clip_node.combined_clip_outer_bounds == DeviceIntRect::zero() {
+            debug!("{:?} of clipped out {:?}", base_prim_index, stacking_context_index);
+            return false;
+        }
 
         let stacking_context = &mut self.stacking_context_store[stacking_context_index.0];
         let pipeline_id = {
             if !stacking_context.can_contribute_to_scene() {
                 return false;
             }
 
             // At least one primitive in this stacking context is visible, so the stacking
             // context is visible.
             stacking_context.is_visible = true;
             stacking_context.pipeline_id
         };
 
         debug!(
-            "\t{:?} of {:?} at {:?}",
+            "\t{:?} of {:?}",
             base_prim_index,
             stacking_context_index,
-            packed_layer_index
         );
 
-        let packed_layer = &self.packed_layers[packed_layer_index.0];
         let display_list = &pipelines
             .get(&pipeline_id)
             .expect("No display list?")
             .display_list;
 
-        if !stacking_context.is_backface_visible && packed_layer.transform.is_backface_visible() {
+        if !stacking_context.is_backface_visible && scroll_node.world_content_transform.is_backface_visible() {
             return false;
         }
 
         let prim_context = PrimitiveContext::new(
-            packed_layer_index,
-            packed_layer,
-            clip_and_scroll.clip_node_id(),
-            clip_chain,
-            clip_bounds,
-            coordinate_system_id,
             device_pixel_ratio,
             display_list,
-        );
-
-        debug!(
-            "\tclip_bounds {:?}, layer_local_clip {:?}",
-            prim_context.clip_bounds,
-            packed_layer.local_clip_rect
+            clip_node,
+            scroll_node,
         );
 
         for i in 0 .. prim_count {
             let prim_index = PrimitiveIndex(base_prim_index.0 + i);
 
             if let Some(prim_geom) = self.prim_store.prepare_prim_for_render(
                 prim_index,
                 &prim_context,
@@ -1566,86 +1469,31 @@ impl FrameBuilder {
                 parent.isolated_items_bounds = parent.isolated_items_bounds.union(&child_bounds);
             }
             // Per-primitive stacking context visibility checks do not take into account
             // visibility of child stacking contexts, so do that now.
             parent.is_visible = parent.is_visible || is_visible;
         }
     }
 
-    fn recalculate_clip_scroll_groups(
-        &mut self,
-        clip_scroll_tree: &ClipScrollTree,
-        screen_rect: &DeviceIntRect,
-        device_pixel_ratio: f32
-    ) {
-        debug!("recalculate_clip_scroll_groups");
-        for ref mut group in &mut self.clip_scroll_group_store {
-            let scroll_node = &clip_scroll_tree.nodes[&group.scroll_node_id];
-            let clip_node = &clip_scroll_tree.nodes[&group.clip_node_id];
-            let packed_layer = &mut self.packed_layers[group.packed_layer_index.0];
-
-            debug!(
-                "\tProcessing group scroll={:?}, clip={:?}",
-                group.scroll_node_id,
-                group.clip_node_id
-            );
-
-            let transform = scroll_node.world_content_transform;
-            if !packed_layer.set_transform(transform) {
-                group.screen_bounding_rect = None;
-                debug!("\t\tUnable to set transform {:?}", transform);
-                continue;
-            }
-
-            // Here we move the viewport rectangle into the coordinate system
-            // of the stacking context content.
-            let local_viewport_rect = clip_node
-                .combined_local_viewport_rect
-                .translate(&clip_node.reference_frame_relative_scroll_offset)
-                .translate(&-scroll_node.reference_frame_relative_scroll_offset)
-                .translate(&-scroll_node.scroll_offset());
-
-            group.screen_bounding_rect = packed_layer.set_rect(
-                &local_viewport_rect,
-                screen_rect,
-                device_pixel_ratio,
-            );
-
-            group.coordinate_system_id = scroll_node.coordinate_system_id;
-
-            debug!(
-                "\t\tlocal viewport {:?} screen bound {:?}",
-                local_viewport_rect,
-                group.screen_bounding_rect
-            );
-        }
-    }
-
     /// Compute the contribution (bounding rectangles, and resources) of layers and their
     /// primitives in screen space.
     fn build_layer_screen_rects_and_cull_layers(
         &mut self,
         screen_rect: &DeviceIntRect,
         clip_scroll_tree: &mut ClipScrollTree,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         profile_counters: &mut FrameProfileCounters,
         device_pixel_ratio: f32,
     ) {
         profile_scope!("cull");
 
-        self.recalculate_clip_scroll_groups(
-            clip_scroll_tree,
-            screen_rect,
-            device_pixel_ratio
-        );
-
         debug!("processing commands...");
         let commands = mem::replace(&mut self.cmds, Vec::new());
         for cmd in &commands {
             match *cmd {
                 PrimitiveRunCmd::PushStackingContext(stacking_context_index) => {
                     self.handle_push_stacking_context(stacking_context_index)
                 }
                 PrimitiveRunCmd::PrimitiveRun(prim_index, prim_count, clip_and_scroll) => {
@@ -1834,23 +1682,25 @@ impl FrameBuilder {
                     let parent_isolation = sc_stack
                         .last()
                         .map(|index| self.stacking_context_store[index.0].isolation);
 
                     if stacking_context.isolation == ContextIsolation::Full && composite_count == 0
                     {
                         let mut prev_task = alpha_task_stack.pop().unwrap();
                         let screen_origin = current_task.as_alpha_batch().screen_origin;
+                        let current_task_size = current_task.get_dynamic_size();
                         let current_task_id = render_tasks.add(current_task);
                         let item = AlphaRenderItem::HardwareComposite(
                             stacking_context_index,
                             current_task_id,
                             HardwareCompositeOp::PremultipliedAlpha,
                             screen_origin,
                             next_z,
+                            current_task_size,
                         );
                         next_z += 1;
                         prev_task.as_alpha_batch_mut().items.push(item);
                         prev_task.children.push(current_task_id);
                         current_task = prev_task;
                     }
 
                     for filter in &stacking_context.composite_ops.filters {
@@ -1878,16 +1728,17 @@ impl FrameBuilder {
                                     stacking_context_index,
                                     blur_render_task_id,
                                     HardwareCompositeOp::PremultipliedAlpha,
                                     DeviceIntPoint::new(
                                         screen_origin.x - inflate_size as i32,
                                         screen_origin.y - inflate_size as i32,
                                     ),
                                     next_z,
+                                    render_tasks.get(current_task_id).get_dynamic_size(),
                                 );
                                 prev_task.as_alpha_batch_mut().items.push(item);
                                 prev_task.children.push(blur_render_task_id);
                                 current_task = prev_task;
                             }
                             _ => {
                                 let item = AlphaRenderItem::Blend(
                                     stacking_context_index,
@@ -1968,57 +1819,51 @@ impl FrameBuilder {
                         next_z += 1;
                     }
 
                     if stacking_context.is_pipeline_root &&
                         output_pipelines.contains(&stacking_context.pipeline_id)
                     {
                         let mut prev_task = alpha_task_stack.pop().unwrap();
                         let screen_origin = current_task.as_alpha_batch().screen_origin;
+                        let current_task_size = current_task.get_dynamic_size();
                         let current_task_id = render_tasks.add(current_task);
                         let item = AlphaRenderItem::HardwareComposite(
                             stacking_context_index,
                             current_task_id,
                             HardwareCompositeOp::PremultipliedAlpha,
                             screen_origin,
                             next_z,
+                            current_task_size,
                         );
                         next_z += 1;
                         prev_task.as_alpha_batch_mut().items.push(item);
                         prev_task.children.push(current_task_id);
                         current_task = prev_task;
                     }
                 }
                 PrimitiveRunCmd::PrimitiveRun(first_prim_index, prim_count, clip_and_scroll) => {
                     let stacking_context_index = *sc_stack.last().unwrap();
                     if !self.stacking_context_store[stacking_context_index.0].is_visible {
                         continue;
                     }
 
-                    let group_id = self.clip_scroll_group_indices[&clip_and_scroll];
-                    let group_index = ClipScrollGroupIndex(group_id, clip_and_scroll);
+                    debug!("\trun of {} items", prim_count);
 
-                    if self.clip_scroll_group_store[group_id]
-                        .screen_bounding_rect
-                        .is_none()
-                    {
-                        debug!("\tcs-group {:?} screen rect is None", group_index);
-                        continue;
-                    }
-
-                    debug!("\trun of {} items", prim_count);
+                    let scroll_node = &clip_scroll_tree.nodes[&clip_and_scroll.scroll_node_id];
+                    let clip_node = &clip_scroll_tree.nodes[&clip_and_scroll.clip_node_id()];
 
                     for i in 0 .. prim_count {
                         let prim_index = PrimitiveIndex(first_prim_index.0 + i);
 
                         if self.prim_store.cpu_metadata[prim_index.0].screen_rect.is_some() {
                             self.prim_store
                                 .add_render_tasks_for_prim(prim_index, &mut current_task);
                             let item =
-                                AlphaRenderItem::Primitive(Some(group_index), prim_index, next_z);
+                                AlphaRenderItem::Primitive(clip_node.id, scroll_node.id, prim_index, next_z);
                             current_task.as_alpha_batch_mut().items.push(item);
                             next_z += 1;
                         }
                     }
                 }
             }
         }
 
@@ -2053,24 +1898,26 @@ impl FrameBuilder {
         let screen_rect = DeviceIntRect::new(
             DeviceIntPoint::zero(),
             DeviceIntSize::new(
                 self.screen_size.width as i32,
                 self.screen_size.height as i32,
             ),
         );
 
+        let mut node_data = Vec::new();
+
         clip_scroll_tree.update_all_node_transforms(
             &screen_rect,
             device_pixel_ratio,
-            &mut self.packed_layers,
             &mut self.clip_store,
             resource_cache,
             gpu_cache,
-            pan
+            pan,
+            &mut node_data,
         );
 
         self.update_scroll_bars(clip_scroll_tree, gpu_cache);
 
         let mut render_tasks = RenderTaskTree::new();
 
         self.build_layer_screen_rects_and_cull_layers(
             &screen_rect,
@@ -2107,19 +1954,19 @@ impl FrameBuilder {
         }
 
         render_tasks.assign_to_passes(main_render_task_id, passes.len() - 1, &mut passes);
 
         for pass in &mut passes {
             let ctx = RenderTargetContext {
                 device_pixel_ratio,
                 stacking_context_store: &self.stacking_context_store,
-                clip_scroll_group_store: &self.clip_scroll_group_store,
                 prim_store: &self.prim_store,
                 resource_cache,
+                node_data: &node_data,
             };
 
             pass.build(
                 &ctx,
                 gpu_cache,
                 &mut render_tasks,
                 &mut deferred_resolves,
                 &self.clip_store,
@@ -2141,15 +1988,15 @@ impl FrameBuilder {
         resource_cache.end_frame();
 
         Frame {
             device_pixel_ratio,
             background_color: self.background_color,
             window_size: self.screen_size,
             profile_counters,
             passes,
-            layer_texture_data: self.packed_layers.clone(),
+            node_data,
             render_tasks,
             deferred_resolves,
             gpu_cache_updates: Some(gpu_cache_updates),
         }
     }
 }
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -1,28 +1,18 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::LayerRect;
+use api::{LayerVector2D, LayerRect, LayerToWorldTransform, WorldToLayerTransform};
 use gpu_cache::GpuCacheAddress;
 use render_task::RenderTaskAddress;
-use tiling::PackedLayerIndex;
 
 // Contains type that must exactly match the same structures declared in GLSL.
 
-#[derive(Debug, Copy, Clone)]
-pub struct PackedLayerAddress(i32);
-
-impl From<PackedLayerIndex> for PackedLayerAddress {
-    fn from(index: PackedLayerIndex) -> PackedLayerAddress {
-        PackedLayerAddress(index.0 as i32)
-    }
-}
-
 #[repr(i32)]
 #[derive(Debug, Copy, Clone)]
 pub enum BlurDirection {
     Horizontal = 0,
     Vertical,
 }
 
 #[derive(Debug)]
@@ -36,110 +26,119 @@ pub struct BlurInstance {
 
 /// A clipping primitive drawn into the clipping mask.
 /// Could be an image or a rectangle, which defines the
 /// way `address` is treated.
 #[derive(Debug, Copy, Clone)]
 #[repr(C)]
 pub struct ClipMaskInstance {
     pub render_task_address: RenderTaskAddress,
-    pub layer_address: PackedLayerAddress,
+    pub scroll_node_id: ClipScrollNodeIndex,
     pub segment: i32,
     pub clip_data_address: GpuCacheAddress,
     pub resource_address: GpuCacheAddress,
 }
 
 // 32 bytes per instance should be enough for anyone!
 #[derive(Debug, Clone)]
 pub struct PrimitiveInstance {
     data: [i32; 8],
 }
 
 pub struct SimplePrimitiveInstance {
     pub specific_prim_address: GpuCacheAddress,
     pub task_address: RenderTaskAddress,
     pub clip_task_address: RenderTaskAddress,
-    pub layer_address: PackedLayerAddress,
+    pub clip_id: ClipScrollNodeIndex,
+    pub scroll_id: ClipScrollNodeIndex,
     pub z_sort_index: i32,
 }
 
 impl SimplePrimitiveInstance {
     pub fn new(
         specific_prim_address: GpuCacheAddress,
         task_address: RenderTaskAddress,
         clip_task_address: RenderTaskAddress,
-        layer_address: PackedLayerAddress,
+        clip_id: ClipScrollNodeIndex,
+        scroll_id: ClipScrollNodeIndex,
         z_sort_index: i32,
     ) -> SimplePrimitiveInstance {
         SimplePrimitiveInstance {
             specific_prim_address,
             task_address,
             clip_task_address,
-            layer_address,
+            clip_id,
+            scroll_id,
             z_sort_index,
         }
     }
 
     pub fn build(&self, data0: i32, data1: i32, data2: i32) -> PrimitiveInstance {
         PrimitiveInstance {
             data: [
                 self.specific_prim_address.as_int(),
                 self.task_address.0 as i32,
                 self.clip_task_address.0 as i32,
-                self.layer_address.0,
+                ((self.clip_id.0 as i32) << 16) | self.scroll_id.0 as i32,
                 self.z_sort_index,
                 data0,
                 data1,
                 data2,
             ],
         }
     }
 }
 
 pub struct CompositePrimitiveInstance {
     pub task_address: RenderTaskAddress,
     pub src_task_address: RenderTaskAddress,
     pub backdrop_task_address: RenderTaskAddress,
     pub data0: i32,
     pub data1: i32,
     pub z: i32,
+    pub data2: i32,
+    pub data3: i32,
 }
 
 impl CompositePrimitiveInstance {
     pub fn new(
         task_address: RenderTaskAddress,
         src_task_address: RenderTaskAddress,
         backdrop_task_address: RenderTaskAddress,
         data0: i32,
         data1: i32,
         z: i32,
+        data2: i32,
+        data3: i32,
     ) -> CompositePrimitiveInstance {
         CompositePrimitiveInstance {
             task_address,
             src_task_address,
             backdrop_task_address,
             data0,
             data1,
             z,
+            data2,
+            data3,
         }
     }
 }
 
 impl From<CompositePrimitiveInstance> for PrimitiveInstance {
     fn from(instance: CompositePrimitiveInstance) -> PrimitiveInstance {
         PrimitiveInstance {
             data: [
                 instance.task_address.0 as i32,
                 instance.src_task_address.0 as i32,
                 instance.backdrop_task_address.0 as i32,
                 instance.z,
                 instance.data0,
                 instance.data1,
-                0,
-                0,
+                instance.data2,
+                instance.data3,
             ],
         }
     }
 }
 
 // Whether this brush is being drawn on a Picture
 // task (new) or an alpha batch task (legacy).
 // Can be removed once everything uses pictures.
@@ -151,31 +150,32 @@ pub const BRUSH_FLAG_USES_PICTURE: i32 =
 //           future, we can compress this vertex
 //           format a lot - e.g. z, render task
 //           addresses etc can reasonably become
 //           a u16 type.
 #[repr(C)]
 pub struct BrushInstance {
     pub picture_address: RenderTaskAddress,
     pub prim_address: GpuCacheAddress,
-    pub layer_address: PackedLayerAddress,
+    pub clip_id: ClipScrollNodeIndex,
+    pub scroll_id: ClipScrollNodeIndex,
     pub clip_task_address: RenderTaskAddress,
     pub z: i32,
     pub flags: i32,
     pub user_data0: i32,
     pub user_data1: i32,
 }
 
 impl From<BrushInstance> for PrimitiveInstance {
     fn from(instance: BrushInstance) -> PrimitiveInstance {
         PrimitiveInstance {
             data: [
                 instance.picture_address.0 as i32,
                 instance.prim_address.as_int(),
-                instance.layer_address.0,
+                ((instance.clip_id.0 as i32) << 16) | instance.scroll_id.0 as i32,
                 instance.clip_task_address.0 as i32,
                 instance.z,
                 instance.flags,
                 instance.user_data0,
                 instance.user_data1,
             ]
         }
     }
@@ -185,8 +185,34 @@ impl From<BrushInstance> for PrimitiveIn
 // In the future, we may draw with segments for each portion
 // of the primitive, in which case this will be redundant.
 #[repr(C)]
 pub enum BrushImageKind {
     Simple = 0,     // A normal rect
     NinePatch = 1,  // A nine-patch image (stretch inside segments)
     Mirror = 2,     // A top left corner only (mirror across x/y axes)
 }
+
+#[derive(Copy, Debug, Clone)]
+#[repr(C)]
+pub struct ClipScrollNodeIndex(pub u32);
+
+#[derive(Debug)]
+#[repr(C)]
+pub struct ClipScrollNodeData {
+    pub transform: LayerToWorldTransform,
+    pub inv_transform: WorldToLayerTransform,
+    pub local_clip_rect: LayerRect,
+    pub reference_frame_relative_scroll_offset: LayerVector2D,
+    pub scroll_offset: LayerVector2D,
+}
+
+impl ClipScrollNodeData {
+    pub fn invalid() -> ClipScrollNodeData {
+        ClipScrollNodeData {
+            transform: LayerToWorldTransform::identity(),
+            inv_transform: WorldToLayerTransform::identity(),
+            local_clip_rect: LayerRect::zero(),
+            reference_frame_relative_scroll_offset: LayerVector2D::zero(),
+            scroll_offset: LayerVector2D::zero(),
+        }
+    }
+}
--- a/gfx/webrender/src/platform/windows/font.rs
+++ b/gfx/webrender/src/platform/windows/font.rs
@@ -282,19 +282,19 @@ impl FontContext {
                     bgra_pixels[i * 4 + 3] = alpha;
                 }
                 bgra_pixels
             }
             FontRenderMode::Subpixel => {
                 let length = pixels.len() / 3;
                 let mut bgra_pixels: Vec<u8> = vec![0; length * 4];
                 for i in 0 .. length {
-                    bgra_pixels[i * 4 + 0] = pixels[i * 3 + 0];
+                    bgra_pixels[i * 4 + 0] = pixels[i * 3 + 2];
                     bgra_pixels[i * 4 + 1] = pixels[i * 3 + 1];
-                    bgra_pixels[i * 4 + 2] = pixels[i * 3 + 2];
+                    bgra_pixels[i * 4 + 2] = pixels[i * 3 + 0];
                     bgra_pixels[i * 4 + 3] = 0xff;
                 }
                 bgra_pixels
             }
         }
     }
 
     pub fn is_bitmap_font(&mut self, _font: &FontInstance) -> bool {
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -13,17 +13,17 @@ use frame_builder::PrimitiveContext;
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
                 ToGpuBlocks};
 use picture::PicturePrimitive;
 use render_task::{ClipWorkItem, ClipChainNode, RenderTask, RenderTaskId, RenderTaskTree};
 use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 use resource_cache::{ImageProperties, ResourceCache};
 use std::{mem, usize};
 use std::rc::Rc;
-use util::{MatrixHelpers, pack_as_float, recycle_vec, TransformedRect};
+use util::{pack_as_float, recycle_vec, MatrixHelpers, TransformedRect, TransformedRectKind};
 
 #[derive(Debug, Copy, Clone)]
 pub struct PrimitiveOpacity {
     pub is_opaque: bool,
 }
 
 impl PrimitiveOpacity {
     pub fn opaque() -> PrimitiveOpacity {
@@ -1204,62 +1204,65 @@ impl PrimitiveStore {
         prim_context: &PrimitiveContext,
         prim_screen_rect: DeviceIntRect,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         clip_store: &mut ClipStore,
     ) -> bool {
         let metadata = &mut self.cpu_metadata[prim_index.0];
+        let transform = &prim_context.scroll_node.world_content_transform;
+
         clip_store.get_mut(&metadata.clip_sources).update(
-            &prim_context.packed_layer.transform,
+            transform,
             gpu_cache,
             resource_cache,
             prim_context.device_pixel_ratio,
         );
 
         // Try to create a mask if we may need to.
         let prim_clips = clip_store.get(&metadata.clip_sources);
-        let is_axis_aligned = prim_context.packed_layer.transform.preserves_2d_axis_alignment();
-        let clip_task = if prim_context.clip_chain.is_some() || prim_clips.is_masking() {
+        let is_axis_aligned = transform.transform_kind() == TransformedRectKind::AxisAligned;
+
+        let clip_task = if prim_context.clip_node.clip_chain_node.is_some() || prim_clips.is_masking() {
             // Take into account the actual clip info of the primitive, and
             // mutate the current bounds accordingly.
             let mask_rect = match prim_clips.bounds.outer {
                 Some(ref outer) => match prim_screen_rect.intersection(&outer.device_rect) {
                     Some(rect) => rect,
                     None => {
                         metadata.screen_rect = None;
                         return false;
                     }
                 },
                 _ => prim_screen_rect,
             };
 
             let extra_clip = if prim_clips.is_masking() {
                 Some(Rc::new(ClipChainNode {
                     work_item: ClipWorkItem {
-                        layer_index: prim_context.packed_layer_index,
+                        scroll_node_id: prim_context.scroll_node.id,
                         clip_sources: metadata.clip_sources.weak(),
-                        coordinate_system_id: prim_context.coordinate_system_id,
+                        coordinate_system_id: prim_context.scroll_node.coordinate_system_id,
                     },
                     prev: None,
                 }))
             } else {
                 None
             };
 
             RenderTask::new_mask(
                 None,
                 mask_rect,
-                prim_context.clip_chain.clone(),
+                prim_context.clip_node.clip_chain_node.clone(),
                 extra_clip,
                 prim_screen_rect,
                 clip_store,
                 is_axis_aligned,
-                prim_context.coordinate_system_id,
+                prim_context.scroll_node.coordinate_system_id,
             )
         } else {
             None
         };
 
         metadata.clip_task_id = clip_task.map(|clip_task| render_tasks.add(clip_task));
         true
     }
@@ -1279,39 +1282,38 @@ impl PrimitiveStore {
 
             if metadata.local_rect.size.width <= 0.0 ||
                metadata.local_rect.size.height <= 0.0 {
                 warn!("invalid primitive rect {:?}", metadata.local_rect);
                 return None;
             }
 
             if !metadata.is_backface_visible &&
-               prim_context.packed_layer.transform.is_backface_visible() {
+               prim_context.scroll_node.world_content_transform.is_backface_visible() {
                 return None;
             }
 
             let local_rect = metadata
                 .local_rect
-                .intersection(&metadata.local_clip_rect)
-                .and_then(|rect| rect.intersection(&prim_context.packed_layer.local_clip_rect));
+                .intersection(&metadata.local_clip_rect);
 
             let local_rect = match local_rect {
                 Some(local_rect) => local_rect,
                 None => return None,
             };
 
             let xf_rect = TransformedRect::new(
                 &local_rect,
-                &prim_context.packed_layer.transform,
+                &prim_context.scroll_node.world_content_transform,
                 prim_context.device_pixel_ratio
             );
 
-            metadata.screen_rect = xf_rect
-                .bounding_rect
-                .intersection(&prim_context.clip_bounds);
+            let clip_bounds = &prim_context.clip_node.combined_clip_outer_bounds;
+            metadata.screen_rect = xf_rect.bounding_rect
+                                          .intersection(clip_bounds);
 
             let geometry = match metadata.screen_rect {
                 Some(device_rect) => Geometry {
                     local_rect,
                     device_rect,
                 },
                 None => return None,
             };
--- a/gfx/webrender/src/profiler.rs
+++ b/gfx/webrender/src/profiler.rs
@@ -321,29 +321,31 @@ impl FrameProfileCounters {
             color_targets: IntProfileCounter::new("Color Targets"),
             alpha_targets: IntProfileCounter::new("Alpha Targets"),
         }
     }
 }
 
 #[derive(Clone)]
 pub struct TextureCacheProfileCounters {
-    pub pages_a8: ResourceProfileCounter,
-    pub pages_rgb8: ResourceProfileCounter,
-    pub pages_rgba8: ResourceProfileCounter,
-    pub pages_rg8: ResourceProfileCounter,
+    pub pages_a8_linear: ResourceProfileCounter,
+    pub pages_rgb8_linear: ResourceProfileCounter,
+    pub pages_rgba8_linear: ResourceProfileCounter,
+    pub pages_rgba8_nearest: ResourceProfileCounter,
+    pub pages_rg8_linear: ResourceProfileCounter,
 }
 
 impl TextureCacheProfileCounters {
     pub fn new() -> Self {
         TextureCacheProfileCounters {
-            pages_a8: ResourceProfileCounter::new("Texture A8 cached pages"),
-            pages_rgb8: ResourceProfileCounter::new("Texture RGB8 cached pages"),
-            pages_rgba8: ResourceProfileCounter::new("Texture RGBA8 cached pages"),
-            pages_rg8: ResourceProfileCounter::new("Texture RG8 cached pages"),
+            pages_a8_linear: ResourceProfileCounter::new("Texture A8 cached pages"),
+            pages_rgb8_linear: ResourceProfileCounter::new("Texture RGB8 cached pages"),
+            pages_rgba8_linear: ResourceProfileCounter::new("Texture RGBA8 cached pages (L)"),
+            pages_rgba8_nearest: ResourceProfileCounter::new("Texture RGBA8 cached pages (N)"),
+            pages_rg8_linear: ResourceProfileCounter::new("Texture RG8 cached pages"),
         }
     }
 }
 
 #[derive(Clone)]
 pub struct GpuCacheProfileCounters {
     pub allocated_rows: IntProfileCounter,
     pub allocated_blocks: IntProfileCounter,
@@ -837,20 +839,21 @@ impl Profiler {
                 &backend_profile.resources.image_templates,
             ],
             debug_renderer,
             true,
         );
 
         self.draw_counters(
             &[
-                &backend_profile.resources.texture_cache.pages_a8,
-                &backend_profile.resources.texture_cache.pages_rgb8,
-                &backend_profile.resources.texture_cache.pages_rgba8,
-                &backend_profile.resources.texture_cache.pages_rg8,
+                &backend_profile.resources.texture_cache.pages_a8_linear,
+                &backend_profile.resources.texture_cache.pages_rgb8_linear,
+                &backend_profile.resources.texture_cache.pages_rgba8_linear,
+                &backend_profile.resources.texture_cache.pages_rgba8_nearest,
+                &backend_profile.resources.texture_cache.pages_rg8_linear,
                 &backend_profile.ipc.display_lists,
             ],
             debug_renderer,
             true,
         );
 
         self.draw_counters(
             &[
--- a/gfx/webrender/src/record.rs
+++ b/gfx/webrender/src/record.rs
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::ApiMsg;
+use api::{ApiMsg, DocumentMsg};
 use bincode::{serialize, Infinite};
 use byteorder::{LittleEndian, WriteBytesExt};
 use std::any::TypeId;
 use std::fmt::Debug;
 use std::fs::File;
 use std::io::Write;
 use std::mem;
 use std::path::PathBuf;
@@ -60,13 +60,19 @@ impl ApiRecordingReceiver for BinaryReco
         self.write_length_and_data(data);
     }
 }
 
 pub fn should_record_msg(msg: &ApiMsg) -> bool {
     match *msg {
         ApiMsg::UpdateResources(..) |
         ApiMsg::AddDocument { .. } |
-        ApiMsg::UpdateDocument(..) |
         ApiMsg::DeleteDocument(..) => true,
+        ApiMsg::UpdateDocument(_, ref msg) => {
+            match *msg {
+                DocumentMsg::GetScrollNodeState(..) |
+                DocumentMsg::HitTest(..) => false,
+                _ => true,
+            }
+        }
         _ => false,
     }
 }
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -3,21 +3,22 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ClipId, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::{ColorF, FilterOp, LayerPoint, MixBlendMode};
 use api::{LayerRect, PipelineId};
 use clip::{ClipSource, ClipSourcesWeakHandle, ClipStore};
 use clip_scroll_tree::CoordinateSystemId;
 use gpu_cache::GpuCacheHandle;
+use gpu_types::{ClipScrollNodeIndex};
 use internal_types::HardwareCompositeOp;
 use prim_store::PrimitiveIndex;
 use std::{cmp, usize, f32, i32};
 use std::rc::Rc;
-use tiling::{ClipScrollGroupIndex, PackedLayerIndex, RenderPass, RenderTargetIndex};
+use tiling::{RenderPass, RenderTargetIndex};
 use tiling::{RenderTargetKind, StackingContextIndex};
 
 const FLOATS_PER_RENDER_TASK_INFO: usize = 12;
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub struct RenderTaskId(pub u32); // TODO(gw): Make private when using GPU cache!
 
 #[derive(Debug, Copy, Clone)]
@@ -146,32 +147,33 @@ pub enum RenderTaskKey {
 #[derive(Debug)]
 pub enum RenderTaskLocation {
     Fixed,
     Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
 }
 
 #[derive(Debug)]
 pub enum AlphaRenderItem {
-    Primitive(Option<ClipScrollGroupIndex>, PrimitiveIndex, i32),
+    Primitive(ClipScrollNodeIndex, ClipScrollNodeIndex, PrimitiveIndex, i32),
     Blend(StackingContextIndex, RenderTaskId, FilterOp, i32),
     Composite(
         StackingContextIndex,
         RenderTaskId,
         RenderTaskId,
         MixBlendMode,
         i32,
     ),
     SplitComposite(StackingContextIndex, RenderTaskId, GpuCacheHandle, i32),
     HardwareComposite(
         StackingContextIndex,
         RenderTaskId,
         HardwareCompositeOp,
         DeviceIntPoint,
         i32,
+        DeviceIntSize,
     ),
 }
 
 #[derive(Debug)]
 pub struct AlphaRenderTask {
     pub screen_origin: DeviceIntPoint,
     pub items: Vec<AlphaRenderItem>,
     // If this render task is a registered frame output, this
@@ -195,17 +197,17 @@ pub enum MaskSegment {
 pub enum MaskGeometryKind {
     Default, // Draw the entire rect
     CornersOnly, // Draw the corners (simple axis aligned mask)
              // TODO(gw): Add more types here (e.g. 4 rectangles outside the inner rect)
 }
 
 #[derive(Debug, Clone)]
 pub struct ClipWorkItem {
-    pub layer_index: PackedLayerIndex,
+    pub scroll_node_id: ClipScrollNodeIndex,
     pub clip_sources: ClipSourcesWeakHandle,
     pub coordinate_system_id: CoordinateSystemId,
 }
 
 impl ClipWorkItem {
     fn get_geometry_kind(
         &self,
         clip_store: &ClipStore,
@@ -263,32 +265,34 @@ pub struct PictureTask {
 }
 
 #[derive(Debug)]
 pub struct BlurTask {
     pub blur_std_deviation: f32,
     pub target_kind: RenderTargetKind,
     pub regions: Vec<LayerRect>,
     pub color: ColorF,
+    pub scale_factor: f32,
 }
 
 #[derive(Debug)]
 pub struct RenderTaskData {
     pub data: [f32; FLOATS_PER_RENDER_TASK_INFO],
 }
 
 #[derive(Debug)]
 pub enum RenderTaskKind {
     Alpha(AlphaRenderTask),
     Picture(PictureTask),
     CacheMask(CacheMaskTask),
     VerticalBlur(BlurTask),
     HorizontalBlur(BlurTask),
     Readback(DeviceIntRect),
     Alias(RenderTaskId),
+    Scaling(RenderTargetKind),
 }
 
 #[derive(Debug, Copy, Clone)]
 pub enum ClearMode {
     // Applicable to color and alpha targets.
     Zero,
     One,
 
@@ -442,94 +446,141 @@ impl RenderTask {
                 clips,
                 geometry_kind,
                 coordinate_system_id: prim_coordinate_system_id,
             }),
             clear_mode: ClearMode::One,
         })
     }
 
-    // Construct a render task to apply a blur to a primitive. For now,
-    // this is only used for text runs, but we can probably extend this
-    // to handle general blurs to any render task in the future.
+    // Construct a render task to apply a blur to a primitive. 
     // The render task chain that is constructed looks like:
     //
-    //    PrimitiveCacheTask: Draw the text run.
+    //    PrimitiveCacheTask: Draw the primitives.
     //           ^
     //           |
+    //    DownscalingTask(s): Each downscaling task reduces the size of render target to
+    //           ^            half. Also reduce the std deviation to half until the std
+    //           |            deviation less than 4.0.
+    //           |
+    //           |
     //    VerticalBlurTask: Apply the separable vertical blur to the primitive.
     //           ^
     //           |
     //    HorizontalBlurTask: Apply the separable horizontal blur to the vertical blur.
     //           |
     //           +---- This is stored as the input task to the primitive shader.
     //
     pub fn new_blur(
         blur_std_deviation: f32,
         src_task_id: RenderTaskId,
         render_tasks: &mut RenderTaskTree,
         target_kind: RenderTargetKind,
         regions: &[LayerRect],
         clear_mode: ClearMode,
         color: ColorF,
     ) -> RenderTask {
+        // Adjust large std deviation value.
+        const MAX_BLUR_STD_DEVIATION: f32 = 4.0;
+        const MIN_DOWNSCALING_RT_SIZE: i32 = 128;
+        let mut adjusted_blur_std_deviation = blur_std_deviation;
         let blur_target_size = render_tasks.get(src_task_id).get_dynamic_size();
+        let mut adjusted_blur_target_size = blur_target_size;
+        let mut downscaling_src_task_id = src_task_id;
+        let mut scale_factor = 1.0;
+        while adjusted_blur_std_deviation > MAX_BLUR_STD_DEVIATION {
+            if adjusted_blur_target_size.width < MIN_DOWNSCALING_RT_SIZE ||
+               adjusted_blur_target_size.height < MIN_DOWNSCALING_RT_SIZE {
+                break;
+            }
+            adjusted_blur_std_deviation *= 0.5;
+            scale_factor *= 2.0;
+            adjusted_blur_target_size = (blur_target_size.to_f32() / scale_factor).to_i32();
+            let downscaling_task = RenderTask::new_scaling(
+                target_kind,
+                downscaling_src_task_id,
+                adjusted_blur_target_size
+            );
+            downscaling_src_task_id = render_tasks.add(downscaling_task);
+        }
+        scale_factor = blur_target_size.width as f32 / adjusted_blur_target_size.width as f32;
 
         let blur_task_v = RenderTask {
             cache_key: None,
-            children: vec![src_task_id],
-            location: RenderTaskLocation::Dynamic(None, blur_target_size),
+            children: vec![downscaling_src_task_id],
+            location: RenderTaskLocation::Dynamic(None, adjusted_blur_target_size),
             kind: RenderTaskKind::VerticalBlur(BlurTask {
-                blur_std_deviation,
+                blur_std_deviation: adjusted_blur_std_deviation,
                 target_kind,
                 regions: regions.to_vec(),
                 color,
+                scale_factor,
             }),
             clear_mode,
         };
 
         let blur_task_v_id = render_tasks.add(blur_task_v);
 
         let blur_task_h = RenderTask {
             cache_key: None,
             children: vec![blur_task_v_id],
-            location: RenderTaskLocation::Dynamic(None, blur_target_size),
+            location: RenderTaskLocation::Dynamic(None, adjusted_blur_target_size),
             kind: RenderTaskKind::HorizontalBlur(BlurTask {
-                blur_std_deviation,
+                blur_std_deviation: adjusted_blur_std_deviation,
                 target_kind,
                 regions: regions.to_vec(),
                 color,
+                scale_factor,
             }),
             clear_mode,
         };
 
         blur_task_h
     }
 
+    pub fn new_scaling(
+        target_kind: RenderTargetKind,
+        src_task_id: RenderTaskId,
+        target_size: DeviceIntSize,
+    ) -> RenderTask {
+        RenderTask {
+            cache_key: None,
+            children: vec![src_task_id],
+            location: RenderTaskLocation::Dynamic(None, target_size),
+            kind: RenderTaskKind::Scaling(target_kind),
+            clear_mode: match target_kind {
+                RenderTargetKind::Color => ClearMode::Transparent,
+                RenderTargetKind::Alpha => ClearMode::One,
+            },
+        }
+    }
+
     pub fn as_alpha_batch_mut<'a>(&'a mut self) -> &'a mut AlphaRenderTask {
         match self.kind {
             RenderTaskKind::Alpha(ref mut task) => task,
             RenderTaskKind::Picture(..) |
             RenderTaskKind::CacheMask(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::HorizontalBlur(..) |
-            RenderTaskKind::Alias(..) => unreachable!(),
+            RenderTaskKind::Alias(..) |
+            RenderTaskKind::Scaling(..) => unreachable!(),
         }
     }
 
     pub fn as_alpha_batch<'a>(&'a self) -> &'a AlphaRenderTask {
         match self.kind {
             RenderTaskKind::Alpha(ref task) => task,
             RenderTaskKind::Picture(..) |
             RenderTaskKind::CacheMask(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::HorizontalBlur(..) |
-            RenderTaskKind::Alias(..) => unreachable!(),
+            RenderTaskKind::Alias(..) |
+            RenderTaskKind::Scaling(..) => unreachable!(),
         }
     }
 
     // Write (up to) 8 floats of data specific to the type
     // of render task that is provided to the GPU shaders
     // via a vertex texture.
     pub fn write_task_data(&self) -> RenderTaskData {
         // NOTE: The ordering and layout of these structures are
@@ -604,26 +655,27 @@ impl RenderTask {
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         target_rect.size.width as f32,
                         target_rect.size.height as f32,
                         target_index.0 as f32,
                         task_info.blur_std_deviation,
-                        0.0,
+                        task_info.scale_factor,
                         0.0,
                         task_info.color.r,
                         task_info.color.g,
                         task_info.color.b,
                         task_info.color.a,
                     ],
                 }
             }
-            RenderTaskKind::Readback(..) => {
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::Scaling(..) => {
                 let (target_rect, target_index) = self.get_target_rect();
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         target_rect.size.width as f32,
                         target_rect.size.height as f32,
                         target_index.0 as f32,
@@ -657,17 +709,18 @@ impl RenderTask {
                 }
             }
 
             RenderTaskKind::Readback(..) |
             RenderTaskKind::CacheMask(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::HorizontalBlur(..) |
             RenderTaskKind::Picture(..) |
-            RenderTaskKind::Alias(..) => {
+            RenderTaskKind::Alias(..) |
+            RenderTaskKind::Scaling(..) => {
                 panic!("bug: inflate only supported for alpha tasks");
             }
         }
     }
 
     pub fn get_dynamic_size(&self) -> DeviceIntSize {
         match self.location {
             RenderTaskLocation::Fixed => DeviceIntSize::zero(),
@@ -695,16 +748,20 @@ impl RenderTask {
                 RenderTargetKind::Alpha
             }
 
             RenderTaskKind::VerticalBlur(ref task_info) |
             RenderTaskKind::HorizontalBlur(ref task_info) => {
                 task_info.target_kind
             }
 
+            RenderTaskKind::Scaling(target_kind) => {
+                target_kind
+            }
+
             RenderTaskKind::Picture(ref task_info) => {
                 task_info.target_kind
             }
 
             RenderTaskKind::Alias(..) => {
                 panic!("BUG: target_kind() called on invalidated task");
             }
         }
@@ -717,17 +774,18 @@ impl RenderTask {
     // trivially extended to also support RGBA8 targets in the future
     // if we decide that is useful.
     pub fn is_shared(&self) -> bool {
         match self.kind {
             RenderTaskKind::Alpha(..) |
             RenderTaskKind::Picture(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
-            RenderTaskKind::HorizontalBlur(..) => false,
+            RenderTaskKind::HorizontalBlur(..) |
+            RenderTaskKind::Scaling(..) => false,
 
             RenderTaskKind::CacheMask(..) => true,
 
             RenderTaskKind::Alias(..) => {
                 panic!("BUG: is_shared() called on aliased task");
             }
         }
     }
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -5,17 +5,17 @@
 //! The webrender API.
 //!
 //! The `webrender::renderer` module provides the interface to webrender, which
 //! is accessible through [`Renderer`][renderer]
 //!
 //! [renderer]: struct.Renderer.html
 
 use api::{channel, BlobImageRenderer, FontRenderMode};
-use api::{ColorF, Epoch, PipelineId, RenderApiSender, RenderNotifier};
+use api::{ColorF, ColorU, Epoch, PipelineId, RenderApiSender, RenderNotifier};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceUintRect, DeviceUintSize};
 use api::{ExternalImageId, ExternalImageType, ImageFormat};
 use api::{YUV_COLOR_SPACES, YUV_FORMATS};
 use api::{YuvColorSpace, YuvFormat};
 #[cfg(not(feature = "debugger"))]
 use api::ApiMsg;
 use api::DebugCommand;
 #[cfg(not(feature = "debugger"))]
@@ -57,17 +57,17 @@ use std::mem;
 use std::path::PathBuf;
 use std::rc::Rc;
 use std::sync::Arc;
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use tiling::{AlphaRenderTarget, ColorRenderTarget, RenderTargetKind};
-use tiling::{BatchKey, BatchKind, BrushBatchKind, Frame, RenderTarget, TransformBatchKind};
+use tiling::{BatchKey, BatchKind, BrushBatchKind, Frame, RenderTarget, ScalingInfo, TransformBatchKind};
 use time::precise_time_ns;
 use util::TransformedRectKind;
 
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
 
 const GPU_TAG_BRUSH_MASK: GpuProfileTag = GpuProfileTag {
     label: "B_Mask",
     color: debug_colors::BLACK,
@@ -216,22 +216,23 @@ bitflags! {
 
 // A generic mode that can be passed to shaders to change
 // behaviour per draw-call.
 type ShaderMode = i32;
 
 #[repr(C)]
 enum TextShaderMode {
     Alpha = 0,
-    SubpixelPass0 = 1,
-    SubpixelPass1 = 2,
-    SubpixelWithBgColorPass0 = 3,
-    SubpixelWithBgColorPass1 = 4,
-    SubpixelWithBgColorPass2 = 5,
-    ColorBitmap = 6,
+    SubpixelOpaque = 1,
+    SubpixelPass0 = 2,
+    SubpixelPass1 = 3,
+    SubpixelWithBgColorPass0 = 4,
+    SubpixelWithBgColorPass1 = 5,
+    SubpixelWithBgColorPass2 = 6,
+    ColorBitmap = 7,
 }
 
 impl Into<ShaderMode> for TextShaderMode {
     fn into(self) -> i32 {
         self as i32
     }
 }
 
@@ -250,17 +251,17 @@ impl From<GlyphFormat> for TextShaderMod
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 enum TextureSampler {
     Color0,
     Color1,
     Color2,
     CacheA8,
     CacheRGBA8,
     ResourceCache,
-    Layers,
+    ClipScrollNodes,
     RenderTasks,
     Dither,
     // A special sampler that is bound to the A8 output of
     // the *first* pass. Items rendered in this target are
     // available as inputs to tasks in any subsequent pass.
     SharedCacheA8,
 }
 
@@ -281,17 +282,17 @@ impl Into<TextureSlot> for TextureSample
     fn into(self) -> TextureSlot {
         match self {
             TextureSampler::Color0 => TextureSlot(0),
             TextureSampler::Color1 => TextureSlot(1),
             TextureSampler::Color2 => TextureSlot(2),
             TextureSampler::CacheA8 => TextureSlot(3),
             TextureSampler::CacheRGBA8 => TextureSlot(4),
             TextureSampler::ResourceCache => TextureSlot(5),
-            TextureSampler::Layers => TextureSlot(6),
+            TextureSampler::ClipScrollNodes => TextureSlot(6),
             TextureSampler::RenderTasks => TextureSlot(7),
             TextureSampler::Dither => TextureSlot(8),
             TextureSampler::SharedCacheA8 => TextureSlot(9),
         }
     }
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -635,17 +636,18 @@ impl SourceTextureResolver {
 }
 
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub enum BlendMode {
     None,
     Alpha,
     PremultipliedAlpha,
     PremultipliedDestOut,
-    Subpixel,
+    SubpixelOpaque(ColorU),
+    SubpixelWithAlpha,
     SubpixelWithBgColor,
 }
 
 // Tracks the state of each row in the GPU cache texture.
 struct CacheRow {
     is_dirty: bool,
 }
 
@@ -1008,17 +1010,18 @@ impl BrushShader {
     ) where M: Into<ShaderMode> {
         match blend_mode {
             BlendMode::None => {
                 self.opaque.bind(device, projection, mode, renderer_errors)
             }
             BlendMode::Alpha |
             BlendMode::PremultipliedAlpha |
             BlendMode::PremultipliedDestOut |
-            BlendMode::Subpixel |
+            BlendMode::SubpixelOpaque(..) |
+            BlendMode::SubpixelWithAlpha |
             BlendMode::SubpixelWithBgColor => {
                 self.alpha.bind(device, projection, mode, renderer_errors)
             }
         }
     }
 
     fn deinit(self, device: &mut Device) {
         self.opaque.deinit(device);
@@ -1121,17 +1124,17 @@ fn create_prim_shader(
             program,
             &[
                 ("sColor0", TextureSampler::Color0),
                 ("sColor1", TextureSampler::Color1),
                 ("sColor2", TextureSampler::Color2),
                 ("sDither", TextureSampler::Dither),
                 ("sCacheA8", TextureSampler::CacheA8),
                 ("sCacheRGBA8", TextureSampler::CacheRGBA8),
-                ("sLayers", TextureSampler::Layers),
+                ("sClipScrollNodes", TextureSampler::ClipScrollNodes),
                 ("sRenderTasks", TextureSampler::RenderTasks),
                 ("sResourceCache", TextureSampler::ResourceCache),
                 ("sSharedCacheA8", TextureSampler::SharedCacheA8),
             ],
         );
     }
 
     program
@@ -1148,17 +1151,17 @@ fn create_clip_shader(name: &'static str
 
     let program = device.create_program(name, &prefix, &DESC_CLIP);
 
     if let Ok(ref program) = program {
         device.bind_shader_samplers(
             program,
             &[
                 ("sColor0", TextureSampler::Color0),
-                ("sLayers", TextureSampler::Layers),
+                ("sClipScrollNodes", TextureSampler::ClipScrollNodes),
                 ("sRenderTasks", TextureSampler::RenderTasks),
                 ("sResourceCache", TextureSampler::ResourceCache),
                 ("sSharedCacheA8", TextureSampler::SharedCacheA8),
             ],
         );
     }
 
     program
@@ -1249,17 +1252,17 @@ pub struct Renderer {
     color_render_targets: Vec<Texture>,
     alpha_render_targets: Vec<Texture>,
 
     gpu_profile: GpuProfiler<GpuProfileTag>,
     prim_vao: VAO,
     blur_vao: VAO,
     clip_vao: VAO,
 
-    layer_texture: VertexDataTexture,
+    node_data_texture: VertexDataTexture,
     render_task_texture: VertexDataTexture,
     gpu_cache_texture: CacheTexture,
 
     pipeline_epoch_map: FastHashMap<PipelineId, Epoch>,
 
     // Manages and resolves source textures IDs to real texture IDs.
     texture_resolver: SourceTextureResolver,
 
@@ -1755,17 +1758,17 @@ impl Renderer {
 
         let blur_vao = device.create_vao_with_new_instances(&DESC_BLUR, &prim_vao);
         let clip_vao = device.create_vao_with_new_instances(&DESC_CLIP, &prim_vao);
 
         let texture_cache_upload_pbo = device.create_pbo();
 
         let texture_resolver = SourceTextureResolver::new(&mut device);
 
-        let layer_texture = VertexDataTexture::new(&mut device);
+        let node_data_texture = VertexDataTexture::new(&mut device);
         let render_task_texture = VertexDataTexture::new(&mut device);
 
         device.end_frame();
 
         let backend_notifier = notifier.clone();
 
         let default_font_render_mode = match (options.enable_aa, options.enable_subpixel_aa) {
             (true, true) => FontRenderMode::Subpixel,
@@ -1863,17 +1866,17 @@ impl Renderer {
             enable_clear_scissor: options.enable_clear_scissor,
             last_time: 0,
             color_render_targets: Vec::new(),
             alpha_render_targets: Vec::new(),
             gpu_profile,
             prim_vao,
             blur_vao,
             clip_vao,
-            layer_texture,
+            node_data_texture,
             render_task_texture,
             pipeline_epoch_map: FastHashMap::default(),
             dither_matrix_texture,
             external_image_handler: None,
             output_image_handler: None,
             output_targets: FastHashMap::default(),
             cpu_profiles: VecDeque::new(),
             gpu_profiles: VecDeque::new(),
@@ -2487,17 +2490,18 @@ impl Renderer {
             }
             BatchKind::Transformable(transform_kind, batch_kind) => match batch_kind {
                 TransformBatchKind::Rectangle(needs_clipping) => {
                     debug_assert!(
                         !needs_clipping || match key.blend_mode {
                             BlendMode::Alpha |
                             BlendMode::PremultipliedAlpha |
                             BlendMode::PremultipliedDestOut |
-                            BlendMode::Subpixel |
+                            BlendMode::SubpixelOpaque(..) |
+                            BlendMode::SubpixelWithAlpha |
                             BlendMode::SubpixelWithBgColor => true,
                             BlendMode::None => false,
                         }
                     );
 
                     if needs_clipping {
                         self.ps_rectangle_clip.bind(
                             &mut self.device,
@@ -2684,16 +2688,40 @@ impl Renderer {
             }
             _ => {}
         }
 
         let _gm = self.gpu_profile.add_marker(marker);
         self.draw_instanced_batch(instances, VertexArrayKind::Primitive, &key.textures);
     }
 
+    fn handle_scaling(
+        &mut self,
+        render_tasks: &RenderTaskTree,
+        scalings: &Vec<ScalingInfo>,
+        source: SourceTexture,
+    ) {
+        let cache_texture = self.texture_resolver
+            .resolve(&source)
+            .unwrap();
+        for scaling in scalings {
+            let source = render_tasks.get(scaling.src_task_id);
+            let dest = render_tasks.get(scaling.dest_task_id);
+
+            let (source_rect, source_layer) = source.get_target_rect();
+            let (dest_rect, _) = dest.get_target_rect();
+
+            let cache_draw_target = (cache_texture, source_layer.0 as i32);
+            self.device
+                .bind_read_target(Some(cache_draw_target));
+
+            self.device.blit_render_target(source_rect, dest_rect);
+        }
+    }
+
     fn draw_color_target(
         &mut self,
         render_target: Option<(&Texture, i32)>,
         target: &ColorRenderTarget,
         target_size: DeviceUintSize,
         clear_color: Option<[f32; 4]>,
         render_tasks: &RenderTaskTree,
         projection: &Transform3D<f32>,
@@ -2750,16 +2778,18 @@ impl Renderer {
                 self.draw_instanced_batch(
                     &target.horizontal_blurs,
                     VertexArrayKind::Blur,
                     &BatchTextures::no_texture(),
                 );
             }
         }
 
+        self.handle_scaling(render_tasks, &target.scalings, SourceTexture::CacheRGBA8);
+
         // Draw any textrun caches for this target. For now, this
         // is only used to cache text runs that are to be blurred
         // for shadow support. In the future it may be worth
         // considering using this for (some) other text runs, since
         // it removes the overhead of submitting many small glyphs
         // to multiple tiles in the normal text run case.
         if !target.text_run_cache_prims.is_empty() {
             self.device.set_blend(true);
@@ -2827,22 +2857,23 @@ impl Renderer {
             }
 
             self.device.disable_depth_write();
             self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
 
             for batch in &target.alpha_batcher.batch_list.alpha_batch_list.batches {
                 if self.debug_flags.contains(DebugFlags::ALPHA_PRIM_DBG) {
                     let color = match batch.key.blend_mode {
-                        BlendMode::None => ColorF::new(0.3, 0.3, 0.3, 1.0),
-                        BlendMode::Alpha => ColorF::new(0.0, 0.9, 0.1, 1.0),
-                        BlendMode::PremultipliedAlpha => ColorF::new(0.0, 0.3, 0.7, 1.0),
-                        BlendMode::PremultipliedDestOut => ColorF::new(0.6, 0.2, 0.0, 1.0),
-                        BlendMode::Subpixel => ColorF::new(0.5, 0.0, 0.4, 1.0),
-                        BlendMode::SubpixelWithBgColor => ColorF::new(0.6, 0.0, 0.5, 1.0),
+                        BlendMode::None => debug_colors::BLACK,
+                        BlendMode::Alpha => debug_colors::YELLOW,
+                        BlendMode::PremultipliedAlpha => debug_colors::GREY,
+                        BlendMode::PremultipliedDestOut => debug_colors::SALMON,
+                        BlendMode::SubpixelOpaque(..) => debug_colors::GREEN,
+                        BlendMode::SubpixelWithAlpha => debug_colors::RED,
+                        BlendMode::SubpixelWithBgColor => debug_colors::BLUE,
                     }.into();
                     for item_rect in &batch.item_rects {
                         self.debug.add_rect(item_rect, color);
                     }
                 }
 
                 match batch.key.kind {
                     BatchKind::Transformable(transform_kind, TransformBatchKind::TextRun(glyph_format)) => {
@@ -2870,17 +2901,34 @@ impl Renderer {
                                 );
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
                                     VertexArrayKind::Primitive,
                                     &batch.key.textures
                                 );
                             }
-                            BlendMode::Subpixel => {
+                            BlendMode::SubpixelOpaque(color) => {
+                                self.device.set_blend_mode_subpixel_opaque(color.into());
+
+                                self.ps_text_run.bind(
+                                    &mut self.device,
+                                    transform_kind,
+                                    projection,
+                                    TextShaderMode::SubpixelOpaque,
+                                    &mut self.renderer_errors,
+                                );
+
+                                self.draw_instanced_batch(
+                                    &batch.instances,
+                                    VertexArrayKind::Primitive,
+                                    &batch.key.textures
+                                );
+                            }
+                            BlendMode::SubpixelWithAlpha => {
                                 // Using the two pass component alpha rendering technique:
                                 //
                                 // http://anholt.livejournal.com/32058.html
                                 //
                                 self.device.set_blend_mode_subpixel_pass0();
 
                                 self.ps_text_run.bind(
                                     &mut self.device,
@@ -2986,17 +3034,19 @@ impl Renderer {
                                 BlendMode::PremultipliedAlpha => {
                                     self.device.set_blend(true);
                                     self.device.set_blend_mode_premultiplied_alpha();
                                 }
                                 BlendMode::PremultipliedDestOut => {
                                     self.device.set_blend(true);
                                     self.device.set_blend_mode_premultiplied_dest_out();
                                 }
-                                BlendMode::Subpixel | BlendMode::SubpixelWithBgColor => {
+                                BlendMode::SubpixelOpaque(..) |
+                                BlendMode::SubpixelWithAlpha |
+                                BlendMode::SubpixelWithBgColor => {
                                     unreachable!("bug: subpx text handled earlier");
                                 }
                             }
                             prev_blend_mode = batch.key.blend_mode;
                         }
 
                         self.submit_batch(
                             &batch.key,
@@ -3109,16 +3159,18 @@ impl Renderer {
                 self.draw_instanced_batch(
                     &target.horizontal_blurs,
                     VertexArrayKind::Blur,
                     &BatchTextures::no_texture(),
                 );
             }
         }
 
+        self.handle_scaling(render_tasks, &target.scalings, SourceTexture::CacheA8);
+
         if !target.brush_mask_corners.is_empty() {
             self.device.set_blend(false);
 
             let _gm = self.gpu_profile.add_marker(GPU_TAG_BRUSH_MASK);
             self.brush_mask_corner
                 .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             self.draw_instanced_batch(
                 &target.brush_mask_corners,
@@ -3344,31 +3396,31 @@ impl Renderer {
             if let Some(texture) = pass.alpha_texture.as_mut() {
                 debug_assert!(pass.max_alpha_target_size.width > 0);
                 debug_assert!(pass.max_alpha_target_size.height > 0);
                 self.device.init_texture(
                     texture,
                     pass.max_alpha_target_size.width,
                     pass.max_alpha_target_size.height,
                     ImageFormat::A8,
-                    TextureFilter::Nearest,
+                    TextureFilter::Linear,
                     RenderTargetMode::RenderTarget,
                     alpha_target_count as i32,
                     None,
                 );
             }
         }
 
-        self.layer_texture
-            .update(&mut self.device, &mut frame.layer_texture_data);
+        self.node_data_texture
+            .update(&mut self.device, &mut frame.node_data);
+        self.device
+            .bind_texture(TextureSampler::ClipScrollNodes, &self.node_data_texture.texture);
+
         self.render_task_texture
             .update(&mut self.device, &mut frame.render_tasks.task_data);
-
-        self.device
-            .bind_texture(TextureSampler::Layers, &self.layer_texture.texture);
         self.device.bind_texture(
             TextureSampler::RenderTasks,
             &self.render_task_texture.texture,
         );
 
         debug_assert!(self.texture_resolver.cache_a8_texture.is_none());
         debug_assert!(self.texture_resolver.cache_rgba8_texture.is_none());
     }
@@ -3664,17 +3716,17 @@ impl Renderer {
     // De-initialize the Renderer safely, assuming the GL is still alive and active.
     pub fn deinit(mut self) {
         //Note: this is a fake frame, only needed because texture deletion is require to happen inside a frame
         self.device.begin_frame(1.0);
         self.gpu_cache_texture.deinit(&mut self.device);
         if let Some(dither_matrix_texture) = self.dither_matrix_texture {
             self.device.delete_texture(dither_matrix_texture);
         }
-        self.layer_texture.deinit(&mut self.device);
+        self.node_data_texture.deinit(&mut self.device);
         self.render_task_texture.deinit(&mut self.device);
         for texture in self.alpha_render_targets {
             self.device.delete_texture(texture);
         }
         for texture in self.color_render_targets {
             self.device.delete_texture(texture);
         }
         self.device.delete_pbo(self.texture_cache_upload_pbo);
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -13,17 +13,18 @@ use internal_types::{CacheTextureId, Ren
 use internal_types::{SourceTexture, TextureUpdate, TextureUpdateOp};
 use profiler::{ResourceProfileCounter, TextureCacheProfileCounters};
 use resource_cache::CacheItem;
 use std::cmp;
 use std::mem;
 
 // The fixed number of layers for the shared texture cache.
 // There is one array texture per image format, allocated lazily.
-const TEXTURE_ARRAY_LAYERS: i32 = 4;
+const TEXTURE_ARRAY_LAYERS_LINEAR: usize = 4;
+const TEXTURE_ARRAY_LAYERS_NEAREST: usize = 1;
 
 // The dimensions of each layer in the texture cache.
 const TEXTURE_LAYER_DIMENSIONS: u32 = 2048;
 
 // The size of each region (page) in a texture layer.
 const TEXTURE_REGION_DIMENSIONS: u32 = 512;
 
 // Maintains a simple freelist of texture IDs that are mapped
@@ -86,36 +87,39 @@ struct CacheEntry {
     // Arbitrary user data associated with this item.
     user_data: [f32; 3],
     // The last frame this item was requested for rendering.
     last_access: FrameId,
     // Handle to the resource rect in the GPU cache.
     uv_rect_handle: GpuCacheHandle,
     // Image format of the item.
     format: ImageFormat,
+    filter: TextureFilter,
     // The actual device texture ID this is part of.
     texture_id: CacheTextureId,
 }
 
 impl CacheEntry {
     // Create a new entry for a standalone texture.
     fn new_standalone(
         texture_id: CacheTextureId,
         size: DeviceUintSize,
         format: ImageFormat,
+        filter: TextureFilter,
         user_data: [f32; 3],
         last_access: FrameId,
     ) -> CacheEntry {
         CacheEntry {
             size,
             user_data,
             last_access,
             kind: EntryKind::Standalone,
             texture_id,
             format,
+            filter,
             uv_rect_handle: GpuCacheHandle::new(),
         }
     }
 
     // Update the GPU cache for this texture cache entry.
     // This ensures that the UV rect, and texture layer index
     // are up to date in the GPU cache for vertex shaders
     // to fetch from.
@@ -159,20 +163,21 @@ impl TextureCacheHandle {
     }
 }
 
 pub struct TextureCache {
     // A lazily allocated, fixed size, texture array for
     // each format the texture cache supports.
     // TODO(gw): Do we actually need RG8 and RGB8 or
     // are they only used by external textures?
-    array_a8: TextureArray,
-    array_rgba8: TextureArray,
-    array_rg8: TextureArray,
-    array_rgb8: TextureArray,
+    array_rgba8_nearest: TextureArray,
+    array_a8_linear: TextureArray,
+    array_rgba8_linear: TextureArray,
+    array_rg8_linear: TextureArray,
+    array_rgb8_linear: TextureArray,
 
     // Maximum texture size supported by hardware.
     max_texture_size: u32,
 
     // A list of texture IDs that represent native
     // texture handles. This indirection allows the texture
     // cache to create / destroy / reuse texture handles
     // without knowing anything about the device code.
@@ -199,44 +204,67 @@ pub struct TextureCache {
     // for evicting old cache items.
     shared_entry_handles: Vec<FreeListHandle<CacheEntry>>,
 }
 
 impl TextureCache {
     pub fn new(max_texture_size: u32) -> TextureCache {
         TextureCache {
             max_texture_size,
-            array_a8: TextureArray::new(ImageFormat::A8),
-            array_rgba8: TextureArray::new(ImageFormat::BGRA8),
-            array_rg8: TextureArray::new(ImageFormat::RG8),
-            array_rgb8: TextureArray::new(ImageFormat::RGB8),
+            array_a8_linear: TextureArray::new(
+                ImageFormat::A8,
+                TextureFilter::Linear,
+                TEXTURE_ARRAY_LAYERS_LINEAR,
+            ),
+            array_rgba8_linear: TextureArray::new(
+                ImageFormat::BGRA8,
+                TextureFilter::Linear,
+                TEXTURE_ARRAY_LAYERS_LINEAR,
+            ),
+            array_rg8_linear: TextureArray::new(
+                ImageFormat::RG8,
+                TextureFilter::Linear,
+                TEXTURE_ARRAY_LAYERS_LINEAR,
+            ),
+            array_rgb8_linear: TextureArray::new(
+                ImageFormat::RGB8,
+                TextureFilter::Linear,
+                TEXTURE_ARRAY_LAYERS_LINEAR,
+            ),
+            array_rgba8_nearest: TextureArray::new(
+                ImageFormat::BGRA8,
+                TextureFilter::Nearest,
+                TEXTURE_ARRAY_LAYERS_NEAREST
+            ),
             cache_textures: CacheTextureIdList::new(),
             pending_updates: TextureUpdateList::new(),
             frame_id: FrameId(0),
             entries: FreeList::new(),
             standalone_entry_handles: Vec::new(),
             shared_entry_handles: Vec::new(),
         }
     }
 
     pub fn begin_frame(&mut self, frame_id: FrameId) {
         self.frame_id = frame_id;
     }
 
     pub fn end_frame(&mut self, texture_cache_profile: &mut TextureCacheProfileCounters) {
         self.expire_old_standalone_entries();
 
-        self.array_a8
-            .update_profile(&mut texture_cache_profile.pages_a8);
-        self.array_rg8
-            .update_profile(&mut texture_cache_profile.pages_rg8);
-        self.array_rgb8
-            .update_profile(&mut texture_cache_profile.pages_rgb8);
-        self.array_rgba8
-            .update_profile(&mut texture_cache_profile.pages_rgba8);
+        self.array_a8_linear
+            .update_profile(&mut texture_cache_profile.pages_a8_linear);
+        self.array_rg8_linear
+            .update_profile(&mut texture_cache_profile.pages_rg8_linear);
+        self.array_rgb8_linear
+            .update_profile(&mut texture_cache_profile.pages_rgb8_linear);
+        self.array_rgba8_linear
+            .update_profile(&mut texture_cache_profile.pages_rgba8_linear);
+        self.array_rgba8_nearest
+            .update_profile(&mut texture_cache_profile.pages_rgba8_nearest);
     }
 
     // Request an item in the texture cache. All images that will
     // be used on a frame *must* have request() called on their
     // handle, to update the last used timestamp and ensure
     // that resources are not flushed from the cache too early.
     //
     // Returns true if the image needs to be uploaded to the
@@ -344,23 +372,32 @@ impl TextureCache {
             entry.texture_id,
             layer_index as i32,
             dirty_rect,
         );
         self.pending_updates.push(op);
     }
 
     // Get a specific region by index from a shared texture array.
-    fn get_region_mut(&mut self, format: ImageFormat, region_index: u16) -> &mut TextureRegion {
-        let texture_array = match format {
-            ImageFormat::A8 => &mut self.array_a8,
-            ImageFormat::BGRA8 => &mut self.array_rgba8,
-            ImageFormat::RGB8 => &mut self.array_rgb8,
-            ImageFormat::RG8 => &mut self.array_rg8,
-            ImageFormat::Invalid | ImageFormat::RGBAF32 => unreachable!(),
+    fn get_region_mut(&mut self,
+        format: ImageFormat,
+        filter: TextureFilter,
+        region_index: u16
+    ) -> &mut TextureRegion {
+        let texture_array = match (format, filter) {
+            (ImageFormat::A8, TextureFilter::Linear) => &mut self.array_a8_linear,
+            (ImageFormat::BGRA8, TextureFilter::Linear) => &mut self.array_rgba8_linear,
+            (ImageFormat::BGRA8, TextureFilter::Nearest) => &mut self.array_rgba8_nearest,
+            (ImageFormat::RGB8, TextureFilter::Linear) => &mut self.array_rgb8_linear,
+            (ImageFormat::RG8, TextureFilter::Linear) => &mut self.array_rg8_linear,
+            (ImageFormat::Invalid, _) |
+            (ImageFormat::RGBAF32, _) |
+            (ImageFormat::A8, TextureFilter::Nearest) |
+            (ImageFormat::RG8, TextureFilter::Nearest) |
+            (ImageFormat::RGB8, TextureFilter::Nearest) => unreachable!(),
         };
 
         &mut texture_array.regions[region_index as usize]
     }
 
     // Retrieve the details of an item in the cache. This is used
     // during batch creation to provide the resource rect address
     // to the shaders and texture ID to the batching logic.
@@ -496,50 +533,60 @@ impl TextureCache {
                 None
             }
             EntryKind::Cache {
                 origin,
                 region_index,
                 ..
             } => {
                 // Free the block in the given region.
-                let region = self.get_region_mut(entry.format, region_index);
+                let region = self.get_region_mut(
+                    entry.format,
+                    entry.filter,
+                    region_index
+                );
                 region.free(origin);
                 Some(region)
             }
         }
     }
 
     // Attempt to allocate a block from the shared cache.
     fn allocate_from_shared_cache(
         &mut self,
         descriptor: &ImageDescriptor,
+        filter: TextureFilter,
         user_data: [f32; 3],
     ) -> Option<CacheEntry> {
         // Work out which cache it goes in, based on format.
-        let texture_array = match descriptor.format {
-            ImageFormat::A8 => &mut self.array_a8,
-            ImageFormat::BGRA8 => &mut self.array_rgba8,
-            ImageFormat::RGB8 => &mut self.array_rgb8,
-            ImageFormat::RG8 => &mut self.array_rg8,
-            ImageFormat::Invalid | ImageFormat::RGBAF32 => unreachable!(),
+        let texture_array = match (descriptor.format, filter) {
+            (ImageFormat::A8, TextureFilter::Linear) => &mut self.array_a8_linear,
+            (ImageFormat::BGRA8, TextureFilter::Linear) => &mut self.array_rgba8_linear,
+            (ImageFormat::BGRA8, TextureFilter::Nearest) => &mut self.array_rgba8_nearest,
+            (ImageFormat::RGB8, TextureFilter::Linear) => &mut self.array_rgb8_linear,
+            (ImageFormat::RG8, TextureFilter::Linear) => &mut self.array_rg8_linear,
+            (ImageFormat::Invalid, _) |
+            (ImageFormat::RGBAF32, _) |
+            (ImageFormat::A8, TextureFilter::Nearest) |
+            (ImageFormat::RG8, TextureFilter::Nearest) |
+            (ImageFormat::RGB8, TextureFilter::Nearest) => unreachable!(),
         };
 
         // Lazy initialize this texture array if required.
         if texture_array.texture_id.is_none() {
             let texture_id = self.cache_textures.allocate();
 
             let update_op = TextureUpdate {
                 id: texture_id,
                 op: TextureUpdateOp::Create {
                     width: TEXTURE_LAYER_DIMENSIONS,
                     height: TEXTURE_LAYER_DIMENSIONS,
                     format: descriptor.format,
-                    filter: TextureFilter::Linear,
-                    layer_count: TEXTURE_ARRAY_LAYERS,
+                    filter: texture_array.filter,
+                    layer_count: texture_array.layer_count as i32,
                     mode: RenderTargetMode::RenderTarget, // todo: !!!! remove me!?
                 },
             };
             self.pending_updates.push(update_op);
 
             texture_array.texture_id = Some(texture_id);
         }
 
@@ -567,43 +614,49 @@ impl TextureCache {
 
         // Work out if this image qualifies to go in the shared (batching) cache.
         let mut allowed_in_shared_cache = true;
         let mut allocated_in_shared_cache = true;
         let mut new_cache_entry = None;
         let size = DeviceUintSize::new(descriptor.width, descriptor.height);
         let frame_id = self.frame_id;
 
-        // TODO(gw): For now, anything that requests nearest filtering
+        // TODO(gw): For now, anything that requests nearest filtering and isn't BGRA8
         //           just fails to allocate in a texture page, and gets a standalone
-        //           texture. This isn't ideal, as it causes lots of batch breaks,
-        //           but is probably rare enough that it can be fixed up later (it's also
-        //           fairly trivial to implement, just tedious).
-        if filter == TextureFilter::Nearest {
+        //           texture. This is probably rare enough that it can be fixed up later.
+        if filter == TextureFilter::Nearest && descriptor.format != ImageFormat::BGRA8 {
             allowed_in_shared_cache = false;
         }
 
         // Anything larger than 512 goes in a standalone texture.
         // TODO(gw): If we find pages that suffer from batch breaks in this
         //           case, add support for storing these in a standalone
         //           texture array.
         if descriptor.width > 512 || descriptor.height > 512 {
             allowed_in_shared_cache = false;
         }
 
         // If it's allowed in the cache, see if there is a spot for it.
         if allowed_in_shared_cache {
-            new_cache_entry = self.allocate_from_shared_cache(&descriptor, user_data);
+            new_cache_entry = self.allocate_from_shared_cache(
+                &descriptor,
+                filter,
+                user_data
+            );
 
             // If we failed to allocate in the shared cache, run an
             // eviction cycle, and then try to allocate again.
             if new_cache_entry.is_none() {
                 self.expire_old_shared_entries(&descriptor);
 
-                new_cache_entry = self.allocate_from_shared_cache(&descriptor, user_data);
+                new_cache_entry = self.allocate_from_shared_cache(
+                    &descriptor,
+                    filter,
+                    user_data
+                );
             }
         }
 
         // If not allowed in the cache, or if the shared cache is full, then it
         // will just have to be in a unique texture. This hurts batching but should
         // only occur on a small number of images (or pathological test cases!).
         if new_cache_entry.is_none() {
             let texture_id = self.cache_textures.allocate();
@@ -622,16 +675,17 @@ impl TextureCache {
                 },
             };
             self.pending_updates.push(update_op);
 
             new_cache_entry = Some(CacheEntry::new_standalone(
                 texture_id,
                 size,
                 descriptor.format,
+                filter,
                 user_data,
                 frame_id,
             ));
 
             allocated_in_shared_cache = false;
         }
 
         let new_cache_entry = new_cache_entry.expect("BUG: must have allocated by now");
@@ -819,37 +873,45 @@ impl TextureRegion {
         }
     }
 }
 
 // A texture array contains a number of texture layers, where
 // each layer contains one or more regions that can act
 // as slab allocators.
 struct TextureArray {
+    filter: TextureFilter,
+    layer_count: usize,
     format: ImageFormat,
     is_allocated: bool,
     regions: Vec<TextureRegion>,
     texture_id: Option<CacheTextureId>,
 }
 
 impl TextureArray {
-    fn new(format: ImageFormat) -> TextureArray {
+    fn new(
+        format: ImageFormat,
+        filter: TextureFilter,
+        layer_count: usize
+    ) -> TextureArray {
         TextureArray {
             format,
+            filter,
+            layer_count,
             is_allocated: false,
             regions: Vec::new(),
             texture_id: None,
         }
     }
 
     fn update_profile(&self, counter: &mut ResourceProfileCounter) {
         if self.is_allocated {
-            let size = TEXTURE_ARRAY_LAYERS as u32 * TEXTURE_LAYER_DIMENSIONS *
+            let size = self.layer_count as u32 * TEXTURE_LAYER_DIMENSIONS *
                 TEXTURE_LAYER_DIMENSIONS * self.format.bytes_per_pixel();
-            counter.set(TEXTURE_ARRAY_LAYERS as usize, size as usize);
+            counter.set(self.layer_count as usize, size as usize);
         } else {
             counter.set(0, 0);
         }
     }
 
     // Allocate space in this texture array.
     fn alloc(
         &mut self,
@@ -859,25 +921,28 @@ impl TextureArray {
         frame_id: FrameId,
     ) -> Option<CacheEntry> {
         // Lazily allocate the regions if not already created.
         // This means that very rarely used image formats can be
         // added but won't allocate a cache if never used.
         if !self.is_allocated {
             debug_assert!(TEXTURE_LAYER_DIMENSIONS % TEXTURE_REGION_DIMENSIONS == 0);
             let regions_per_axis = TEXTURE_LAYER_DIMENSIONS / TEXTURE_REGION_DIMENSIONS;
-            for layer_index in 0 .. TEXTURE_ARRAY_LAYERS {
+            for layer_index in 0 .. self.layer_count {
                 for y in 0 .. regions_per_axis {
                     for x in 0 .. regions_per_axis {
                         let origin = DeviceUintPoint::new(
                             x * TEXTURE_REGION_DIMENSIONS,
                             y * TEXTURE_REGION_DIMENSIONS,
                         );
-                        let region =
-                            TextureRegion::new(TEXTURE_REGION_DIMENSIONS, layer_index, origin);
+                        let region = TextureRegion::new(
+                            TEXTURE_REGION_DIMENSIONS,
+                            layer_index as i32,
+                            origin
+                        );
                         self.regions.push(region);
                     }
                 }
             }
             self.is_allocated = true;
         }
 
         // Quantize the size of the allocation to select a region to
@@ -931,16 +996,17 @@ impl TextureArray {
         entry_kind.map(|kind| {
             CacheEntry {
                 size: DeviceUintSize::new(width, height),
                 user_data,
                 last_access: frame_id,
                 kind,
                 uv_rect_handle: GpuCacheHandle::new(),
                 format: self.format,
+                filter: self.filter,
                 texture_id: self.texture_id.unwrap(),
             }
         })
     }
 }
 
 impl TextureUpdate {
     // Constructs a TextureUpdate operation to be passed to the
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -1,42 +1,41 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadiusKind, ClipAndScrollInfo, ClipId, ColorF, DeviceIntPoint, ImageKey};
 use api::{DeviceIntRect, DeviceIntSize, DeviceUintPoint, DeviceUintSize};
 use api::{ExternalImageType, FilterOp, FontRenderMode, ImageRendering, LayerRect};
-use api::{LayerToWorldTransform, MixBlendMode, PipelineId, PropertyBinding, TransformStyle};
-use api::{LayerVector2D, TileOffset, WorldToLayerTransform, YuvColorSpace, YuvFormat};
+use api::{MixBlendMode, PipelineId, PropertyBinding, TransformStyle};
+use api::{LayerVector2D, TileOffset, YuvColorSpace, YuvFormat};
 use border::{BorderCornerInstance, BorderCornerSide};
 use clip::{ClipSource, ClipStore};
 use clip_scroll_tree::CoordinateSystemId;
 use device::Texture;
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle, GpuCacheUpdateList};
 use gpu_types::{BlurDirection, BlurInstance, BrushInstance, BrushImageKind, ClipMaskInstance};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
-use gpu_types::{BRUSH_FLAG_USES_PICTURE};
+use gpu_types::{BRUSH_FLAG_USES_PICTURE, ClipScrollNodeIndex, ClipScrollNodeData};
 use internal_types::{FastHashMap, SourceTexture};
 use internal_types::BatchTextures;
 use picture::PictureKind;
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
 use prim_store::{BrushMaskKind, BrushKind, DeferredResolve, RectangleContent};
 use profiler::FrameProfileCounters;
 use render_task::{AlphaRenderItem, ClipWorkItem, MaskGeometryKind, MaskSegment};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKey, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, RenderTaskLocation, RenderTaskTree};
 use renderer::BlendMode;
 use renderer::ImageBufferKind;
 use resource_cache::{GlyphFetchResult, ResourceCache};
 use std::{cmp, usize, f32, i32};
 use texture_allocator::GuillotineAllocator;
-use util::{MatrixHelpers, TransformedRect, TransformedRectKind};
-use euclid::rect;
+use util::{MatrixHelpers, TransformedRectKind};
 
 // Special sentinel value recognized by the shader. It is considered to be
 // a dummy task that doesn't mask out anything.
 const OPAQUE_TASK_ADDRESS: RenderTaskAddress = RenderTaskAddress(i32::MAX as u32);
 const MIN_TARGET_SIZE: u32 = 2048;
 
 trait AlphaBatchHelpers {
     fn get_blend_mode(
@@ -54,21 +53,25 @@ impl AlphaBatchHelpers for PrimitiveStor
     ) -> BlendMode {
         let needs_blending = !metadata.opacity.is_opaque || metadata.clip_task_id.is_some() ||
             transform_kind == TransformedRectKind::Complex;
 
         match metadata.prim_kind {
             PrimitiveKind::TextRun => {
                 let font = &self.cpu_text_runs[metadata.cpu_prim_index.0].font;
                 match font.render_mode {
-                    FontRenderMode::Subpixel => if font.bg_color.a != 0 {
-                        BlendMode::SubpixelWithBgColor
-                    } else {
-                        BlendMode::Subpixel
-                    },
+                    FontRenderMode::Subpixel => {
+                        if font.bg_color.a != 0 {
+                            BlendMode::SubpixelWithBgColor
+                        } else if font.color.a != 255 || metadata.clip_task_id.is_some() {
+                            BlendMode::SubpixelWithAlpha
+                        } else {
+                            BlendMode::SubpixelOpaque(font.color)
+                        }
+                    }
                     FontRenderMode::Alpha |
                     FontRenderMode::Mono |
                     FontRenderMode::Bitmap => BlendMode::PremultipliedAlpha,
                 }
             },
             PrimitiveKind::Rectangle => {
                 let rectangle_cpu = &self.cpu_rectangles[metadata.cpu_prim_index.0];
                 match rectangle_cpu.content {
@@ -152,24 +155,25 @@ impl AlphaBatchList {
 
     fn get_suitable_batch(
         &mut self,
         key: BatchKey,
         item_bounding_rect: &DeviceIntRect,
     ) -> &mut Vec<PrimitiveInstance> {
         let mut selected_batch_index = None;
 
-        match key.kind {
-            BatchKind::Composite { .. } => {
+        match (key.kind, key.blend_mode) {
+            (BatchKind::Composite { .. }, _) => {
                 // Composites always get added to their own batch.
                 // This is because the result of a composite can affect
                 // the input to the next composite. Perhaps we can
                 // optimize this in the future.
             }
-            BatchKind::Transformable(_, TransformBatchKind::TextRun(_)) => {
+            (BatchKind::Transformable(_, TransformBatchKind::TextRun(_)), BlendMode::SubpixelWithBgColor) |
+            (BatchKind::Transformable(_, TransformBatchKind::TextRun(_)), BlendMode::SubpixelWithAlpha) => {
                 'outer_text: for (batch_index, batch) in self.batches.iter().enumerate().rev().take(10) {
                     // Subpixel text is drawn in two passes. Because of this, we need
                     // to check for overlaps with every batch (which is a bit different
                     // than the normal batching below).
                     for item_rect in &batch.item_rects {
                         if item_rect.intersects(item_bounding_rect) {
                             break 'outer_text;
                         }
@@ -276,17 +280,19 @@ impl BatchList {
     fn get_suitable_batch(
         &mut self,
         key: BatchKey,
         item_bounding_rect: &DeviceIntRect,
     ) -> &mut Vec<PrimitiveInstance> {
         match key.blend_mode {
             BlendMode::None => self.opaque_batch_list.get_suitable_batch(key),
             BlendMode::Alpha | BlendMode::PremultipliedAlpha |
-            BlendMode::PremultipliedDestOut | BlendMode::Subpixel |
+            BlendMode::PremultipliedDestOut |
+            BlendMode::SubpixelOpaque(..) |
+            BlendMode::SubpixelWithAlpha |
             BlendMode::SubpixelWithBgColor => {
                 self.alpha_batch_list
                     .get_suitable_batch(key, item_bounding_rect)
             }
         }
     }
 
     fn finalize(&mut self) {
@@ -341,43 +347,53 @@ impl AlphaRenderItem {
 
                 let instance = CompositePrimitiveInstance::new(
                     task_address,
                     src_task_address,
                     RenderTaskAddress(0),
                     filter_mode,
                     amount,
                     z,
+                    0,
+                    0,
                 );
 
                 batch.push(PrimitiveInstance::from(instance));
             }
             AlphaRenderItem::HardwareComposite(
                 stacking_context_index,
                 src_id,
                 composite_op,
                 screen_origin,
                 z,
+                dest_rect,
             ) => {
                 let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
                 let src_task_address = render_tasks.get_task_address(src_id);
                 let key = BatchKey::new(
                     BatchKind::HardwareComposite,
                     composite_op.to_blend_mode(),
                     BatchTextures::no_texture(),
                 );
                 let batch = batch_list.get_suitable_batch(key, &stacking_context.screen_bounds);
+                let dest_rect = if dest_rect.width > 0 && dest_rect.height > 0 {
+                    dest_rect
+                } else {
+                    render_tasks.get(src_id).get_dynamic_size()
+                };
 
                 let instance = CompositePrimitiveInstance::new(
                     task_address,
                     src_task_address,
                     RenderTaskAddress(0),
                     screen_origin.x,
                     screen_origin.y,
                     z,
+                    dest_rect.width,
+                    dest_rect.height,
                 );
 
                 batch.push(PrimitiveInstance::from(instance));
             }
             AlphaRenderItem::Composite(stacking_context_index, source_id, backdrop_id, mode, z) => {
                 let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
                 let key = BatchKey::new(
                     BatchKind::Composite {
@@ -394,41 +410,41 @@ impl AlphaRenderItem {
 
                 let instance = CompositePrimitiveInstance::new(
                     task_address,
                     source_task_address,
                     backdrop_task_address,
                     mode as u32 as i32,
                     0,
                     z,
+                    0,
+                    0,
                 );
 
                 batch.push(PrimitiveInstance::from(instance));
             }
-            AlphaRenderItem::Primitive(clip_scroll_group_index_opt, prim_index, z) => {
+            AlphaRenderItem::Primitive(clip_id, scroll_id, prim_index, z) => {
                 let prim_metadata = ctx.prim_store.get_metadata(prim_index);
-                let (transform_kind, packed_layer_index) = match clip_scroll_group_index_opt {
-                    Some(group_index) => {
-                        let group = &ctx.clip_scroll_group_store[group_index.0];
-                        let bounding_rect = group.screen_bounding_rect.as_ref().unwrap();
-                        (bounding_rect.0, group.packed_layer_index)
-                    }
-                    None => (TransformedRectKind::AxisAligned, PackedLayerIndex(0)),
-                };
+                let scroll_node = &ctx.node_data[scroll_id.0 as usize];
+                // TODO(gw): Calculating this for every primitive is a bit
+                //           wasteful. We should probably cache this in
+                //           the scroll node...
+                let transform_kind = scroll_node.transform.transform_kind();
                 let item_bounding_rect = prim_metadata.screen_rect.as_ref().unwrap();
                 let prim_cache_address = gpu_cache.get_address(&prim_metadata.gpu_location);
                 let no_textures = BatchTextures::no_texture();
                 let clip_task_address = prim_metadata
                     .clip_task_id
                     .map_or(OPAQUE_TASK_ADDRESS, |id| render_tasks.get_task_address(id));
                 let base_instance = SimplePrimitiveInstance::new(
                     prim_cache_address,
                     task_address,
                     clip_task_address,
-                    packed_layer_index.into(),
+                    clip_id,
+                    scroll_id,
                     z,
                 );
 
                 let blend_mode = ctx.prim_store.get_blend_mode(prim_metadata, transform_kind);
 
                 match prim_metadata.prim_kind {
                     PrimitiveKind::Brush => {
                         panic!("BUG: brush type not expected in an alpha task (yet)");
@@ -626,17 +642,18 @@ impl AlphaRenderItem {
                                         BrushImageKind::NinePatch
                                     }
                                 }
                             }
                         };
                         let instance = BrushInstance {
                             picture_address: task_address,
                             prim_address: prim_cache_address,
-                            layer_address: packed_layer_index.into(),
+                            clip_id,
+                            scroll_id,
                             clip_task_address,
                             z,
                             flags: 0,
                             user_data0: cache_task_address.0 as i32,
                             user_data1: image_kind as i32,
                         };
                         batch.push(PrimitiveInstance::from(instance));
                     }
@@ -768,16 +785,18 @@ impl AlphaRenderItem {
 
                 let instance = CompositePrimitiveInstance::new(
                     task_address,
                     source_task_address,
                     RenderTaskAddress(0),
                     gpu_address,
                     0,
                     z,
+                    0,
+                    0,
                 );
 
                 batch.push(PrimitiveInstance::from(instance));
             }
         }
     }
 }
 
@@ -859,17 +878,17 @@ impl ClipBatcher {
         gpu_cache: &GpuCache,
         geometry_kind: MaskGeometryKind,
         clip_store: &ClipStore,
     ) {
         let mut coordinate_system_id = coordinate_system_id;
         for work_item in clips.iter() {
             let instance = ClipMaskInstance {
                 render_task_address: task_address,
-                layer_address: work_item.layer_index.into(),
+                scroll_node_id: work_item.scroll_node_id,
                 segment: 0,
                 clip_data_address: GpuCacheAddress::invalid(),
                 resource_address: GpuCacheAddress::invalid(),
             };
             let info = clip_store
                 .get_opt(&work_item.clip_sources)
                 .expect("bug: clip handle should be valid");
 
@@ -951,19 +970,19 @@ impl ClipBatcher {
             }
         }
     }
 }
 
 pub struct RenderTargetContext<'a> {
     pub device_pixel_ratio: f32,
     pub stacking_context_store: &'a [StackingContext],
-    pub clip_scroll_group_store: &'a [ClipScrollGroup],
     pub prim_store: &'a PrimitiveStore,
     pub resource_cache: &'a ResourceCache,
+    pub node_data: &'a [ClipScrollNodeData],
 }
 
 struct TextureAllocator {
     // TODO(gw): Replace this with a simpler allocator for
     // render target allocation - this use case doesn't need
     // to deal with coalescing etc that the general texture
     // cache allocator requires.
     allocator: GuillotineAllocator,
@@ -1104,26 +1123,32 @@ impl<T: RenderTarget> RenderTargetList<T
 /// Storing the task ID allows the renderer to find
 /// the target rect within the render target that this
 /// pipeline exists at.
 pub struct FrameOutput {
     pub task_id: RenderTaskId,
     pub pipeline_id: PipelineId,
 }
 
+pub struct ScalingInfo {
+    pub src_task_id: RenderTaskId,
+    pub dest_task_id: RenderTaskId,
+}
+
 /// A render target represents a number of rendering operations on a surface.
 pub struct ColorRenderTarget {
     pub alpha_batcher: AlphaBatcher,
     // List of text runs to be cached to this render target.
     pub text_run_cache_prims: FastHashMap<SourceTexture, Vec<PrimitiveInstance>>,
     pub line_cache_prims: Vec<PrimitiveInstance>,
     // List of blur operations to apply for this render target.
     pub vertical_blurs: Vec<BlurInstance>,
     pub horizontal_blurs: Vec<BlurInstance>,
     pub readbacks: Vec<DeviceIntRect>,
+    pub scalings: Vec<ScalingInfo>,
     // List of frame buffer outputs for this render target.
     pub outputs: Vec<FrameOutput>,
     allocator: Option<TextureAllocator>,
     glyph_fetch_buffer: Vec<GlyphFetchResult>,
 }
 
 impl RenderTarget for ColorRenderTarget {
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint> {
@@ -1136,16 +1161,17 @@ impl RenderTarget for ColorRenderTarget 
     fn new(size: Option<DeviceUintSize>) -> ColorRenderTarget {
         ColorRenderTarget {
             alpha_batcher: AlphaBatcher::new(),
             text_run_cache_prims: FastHashMap::default(),
             line_cache_prims: Vec::new(),
             vertical_blurs: Vec::new(),
             horizontal_blurs: Vec::new(),
             readbacks: Vec::new(),
+            scalings: Vec::new(),
             allocator: size.map(|size| TextureAllocator::new(size)),
             glyph_fetch_buffer: Vec::new(),
             outputs: Vec::new(),
         }
     }
 
     fn used_rect(&self) -> DeviceIntRect {
         self.allocator
@@ -1223,17 +1249,18 @@ impl RenderTarget for ColorRenderTarget 
 
                                 let sub_metadata = ctx.prim_store.get_metadata(sub_prim_index);
                                 let sub_prim_address =
                                     gpu_cache.get_address(&sub_metadata.gpu_location);
                                 let instance = SimplePrimitiveInstance::new(
                                     sub_prim_address,
                                     task_index,
                                     RenderTaskAddress(0),
-                                    PackedLayerIndex(0).into(),
+                                    ClipScrollNodeIndex(0),
+                                    ClipScrollNodeIndex(0),
                                     0,
                                 ); // z is disabled for rendering cache primitives
 
                                 match sub_metadata.prim_kind {
                                     PrimitiveKind::TextRun => {
                                         // Add instances that reference the text run GPU location. Also supply
                                         // the parent shadow prim address as a user data field, allowing
                                         // the shader to fetch the shadow parameters.
@@ -1281,43 +1308,51 @@ impl RenderTarget for ColorRenderTarget 
                 }
             }
             RenderTaskKind::CacheMask(..) => {
                 panic!("Should not be added to color target!");
             }
             RenderTaskKind::Readback(device_rect) => {
                 self.readbacks.push(device_rect);
             }
+            RenderTaskKind::Scaling(..) => {
+                self.scalings.push(ScalingInfo {
+                    src_task_id: task.children[0],
+                    dest_task_id: task_id,
+                });
+            }
         }
     }
 }
 
 pub struct AlphaRenderTarget {
     pub clip_batcher: ClipBatcher,
     pub brush_mask_corners: Vec<PrimitiveInstance>,
     pub brush_mask_rounded_rects: Vec<PrimitiveInstance>,
     // List of blur operations to apply for this render target.
     pub vertical_blurs: Vec<BlurInstance>,
     pub horizontal_blurs: Vec<BlurInstance>,
+    pub scalings: Vec<ScalingInfo>,
     pub zero_clears: Vec<RenderTaskId>,
     allocator: TextureAllocator,
 }
 
 impl RenderTarget for AlphaRenderTarget {
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint> {
         self.allocator.allocate(&size)
     }
 
     fn new(size: Option<DeviceUintSize>) -> AlphaRenderTarget {
         AlphaRenderTarget {
             clip_batcher: ClipBatcher::new(),
             brush_mask_corners: Vec::new(),
             brush_mask_rounded_rects: Vec::new(),
             vertical_blurs: Vec::new(),
             horizontal_blurs: Vec::new(),
+            scalings: Vec::new(),
             zero_clears: Vec::new(),
             allocator: TextureAllocator::new(size.expect("bug: alpha targets need size")),
         }
     }
 
     fn used_rect(&self) -> DeviceIntRect {
         self.allocator.used_rect
     }
@@ -1390,17 +1425,18 @@ impl RenderTarget for AlphaRenderTarget 
                                         let instance = BrushInstance {
                                             picture_address: task_index,
                                             prim_address: sub_prim_address,
                                             // TODO(gw): In the future, when brush
                                             //           primitives on picture backed
                                             //           tasks support clip masks and
                                             //           transform primitives, these
                                             //           will need to be filled out!
-                                            layer_address: PackedLayerIndex(0).into(),
+                                            clip_id: ClipScrollNodeIndex(0),
+                                            scroll_id: ClipScrollNodeIndex(0),
                                             clip_task_address: RenderTaskAddress(0),
                                             z: 0,
                                             flags: BRUSH_FLAG_USES_PICTURE,
                                             user_data0: 0,
                                             user_data1: 0,
                                         };
                                         let brush = &ctx.prim_store.cpu_brushes[sub_metadata.cpu_prim_index.0];
                                         let batch = match brush.kind {
@@ -1433,16 +1469,22 @@ impl RenderTarget for AlphaRenderTarget 
                     &task_info.clips,
                     task_info.coordinate_system_id,
                     &ctx.resource_cache,
                     gpu_cache,
                     task_info.geometry_kind,
                     clip_store,
                 );
             }
+            RenderTaskKind::Scaling(..) => {
+                self.scalings.push(ScalingInfo {
+                    src_task_id: task.children[0],
+                    dest_task_id: task_id,
+                });
+            }
         }
     }
 }
 
 /// A render pass represents a set of rendering operations that don't depend on one
 /// another.
 ///
 /// A render pass can have several render targets if there wasn't enough space in one
@@ -1696,19 +1738,16 @@ impl OpaquePrimitiveBatch {
         OpaquePrimitiveBatch {
             key,
             instances: Vec::new(),
         }
     }
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
-pub struct PackedLayerIndex(pub usize);
-
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 pub struct StackingContextIndex(pub usize);
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 pub enum ContextIsolation {
     /// No isolation - the content is mixed up with everything else.
     None,
     /// Items are isolated and drawn into a separate render target.
     /// Child contexts are exposed.
@@ -1803,83 +1842,16 @@ impl StackingContext {
         }
     }
 
     pub fn can_contribute_to_scene(&self) -> bool {
         !self.composite_ops.will_make_invisible()
     }
 }
 
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
-pub struct ClipScrollGroupIndex(pub usize, pub ClipAndScrollInfo);
-
-#[derive(Debug)]
-pub struct ClipScrollGroup {
-    pub scroll_node_id: ClipId,
-    pub clip_node_id: ClipId,
-    pub packed_layer_index: PackedLayerIndex,
-    pub screen_bounding_rect: Option<(TransformedRectKind, DeviceIntRect)>,
-    pub coordinate_system_id: CoordinateSystemId,
-}
-
-impl ClipScrollGroup {
-    pub fn is_visible(&self) -> bool {
-        self.screen_bounding_rect.is_some()
-    }
-}
-
-#[derive(Debug, Clone)]
-#[repr(C)]
-pub struct PackedLayer {
-    pub transform: LayerToWorldTransform,
-    pub inv_transform: WorldToLayerTransform,
-    pub local_clip_rect: LayerRect,
-}
-
-impl PackedLayer {
-    pub fn empty() -> PackedLayer {
-        PackedLayer {
-            transform: LayerToWorldTransform::identity(),
-            inv_transform: WorldToLayerTransform::identity(),
-            local_clip_rect: LayerRect::zero(),
-        }
-    }
-
-    pub fn set_transform(&mut self, transform: LayerToWorldTransform) -> bool {
-        self.transform = transform;
-        match self.transform.inverse() {
-            Some(inv) => {
-                self.inv_transform = inv;
-                true
-            }
-            None => false,
-        }
-    }
-
-    pub fn set_rect(
-        &mut self,
-        local_rect: &LayerRect,
-        screen_rect: &DeviceIntRect,
-        device_pixel_ratio: f32,
-    ) -> Option<(TransformedRectKind, DeviceIntRect)> {
-        self.local_clip_rect = if self.transform.has_perspective_component() {
-            // Given a very large rect which means any rect would be inside this rect.
-            // That is, nothing would be clipped.
-            rect(f32::MIN / 2.0, f32::MIN / 2.0, f32::MAX, f32::MAX)
-        } else {
-            *local_rect
-        };
-        let xf_rect = TransformedRect::new(local_rect, &self.transform, device_pixel_ratio);
-        xf_rect
-            .bounding_rect
-            .intersection(screen_rect)
-            .map(|rect| (xf_rect.kind, rect))
-    }
-}
-
 #[derive(Debug, Clone, Default)]
 pub struct CompositeOps {
     // Requires only a single texture as input (e.g. most filters)
     pub filters: Vec<FilterOp>,
 
     // Requires two source textures (e.g. mix-blend-mode)
     pub mix_blend_mode: Option<MixBlendMode>,
 }
@@ -1910,18 +1882,17 @@ impl CompositeOps {
 /// and presented to the renderer.
 pub struct Frame {
     pub window_size: DeviceUintSize,
     pub background_color: Option<ColorF>,
     pub device_pixel_ratio: f32,
     pub passes: Vec<RenderPass>,
     pub profile_counters: FrameProfileCounters,
 
-    pub layer_texture_data: Vec<PackedLayer>,
-
+    pub node_data: Vec<ClipScrollNodeData>,
     pub render_tasks: RenderTaskTree,
 
     // List of updates that need to be pushed to the
     // gpu resource cache.
     pub gpu_cache_updates: Option<GpuCacheUpdateList>,
 
     // List of textures that we don't know about yet
     // from the backend thread. The render thread
--- a/gfx/webrender/src/util.rs
+++ b/gfx/webrender/src/util.rs
@@ -17,16 +17,17 @@ const NEARLY_ZERO: f32 = 1.0 / 4096.0;
 // TODO: Implement these in euclid!
 pub trait MatrixHelpers<Src, Dst> {
     fn transform_rect(&self, rect: &TypedRect<f32, Src>) -> TypedRect<f32, Dst>;
     fn is_identity(&self) -> bool;
     fn preserves_2d_axis_alignment(&self) -> bool;
     fn has_perspective_component(&self) -> bool;
     fn inverse_project(&self, target: &TypedPoint2D<f32, Dst>) -> Option<TypedPoint2D<f32, Src>>;
     fn inverse_rect_footprint(&self, rect: &TypedRect<f32, Dst>) -> TypedRect<f32, Src>;
+    fn transform_kind(&self) -> TransformedRectKind;
 }
 
 impl<Src, Dst> MatrixHelpers<Src, Dst> for TypedTransform3D<f32, Src, Dst> {
     fn transform_rect(&self, rect: &TypedRect<f32, Src>) -> TypedRect<f32, Dst> {
         let top_left = self.transform_point2d(&rect.origin);
         let top_right = self.transform_point2d(&rect.top_right());
         let bottom_left = self.transform_point2d(&rect.bottom_left());
         let bottom_right = self.transform_point2d(&rect.bottom_right());
@@ -93,16 +94,24 @@ impl<Src, Dst> MatrixHelpers<Src, Dst> f
             self.inverse_project(&rect.top_right())
                 .unwrap_or(TypedPoint2D::zero()),
             self.inverse_project(&rect.bottom_left())
                 .unwrap_or(TypedPoint2D::zero()),
             self.inverse_project(&rect.bottom_right())
                 .unwrap_or(TypedPoint2D::zero()),
         ])
     }
+
+    fn transform_kind(&self) -> TransformedRectKind {
+        if self.preserves_2d_axis_alignment() {
+            TransformedRectKind::AxisAligned
+        } else {
+            TransformedRectKind::Complex
+        }
+    }
 }
 
 pub trait RectHelpers<U>
 where
     Self: Sized,
 {
     fn contains_rect(&self, other: &Self) -> bool;
     fn from_floats(x0: f32, y0: f32, x1: f32, y1: f32) -> Self;
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -150,16 +150,23 @@ pub struct StickyFrameDisplayItem {
     /// original position relative to non-sticky content within the same scrolling frame.
     pub vertical_offset_bounds: StickyOffsetBounds,
 
     /// The minimum and maximum horizontal offsets for this sticky frame. Ignoring these constraints,
     /// the sticky frame will continue to stick to the edge of the viewport as its original
     /// position is scrolled out of view. Constraints specify a maximum and minimum offset from the
     /// original position relative to non-sticky content within the same scrolling frame.
     pub horizontal_offset_bounds: StickyOffsetBounds,
+
+    /// The amount of offset that has already been applied to the sticky frame. A positive y
+    /// component this field means that a top-sticky item was in a scrollframe that has been
+    /// scrolled down, such that the sticky item's position needed to be offset downwards by
+    /// `previously_applied_offset.y`. A negative y component corresponds to the upward offset
+    /// applied due to bottom-stickiness. The x-axis works analogously.
+    pub previously_applied_offset: LayoutVector2D,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub enum ScrollSensitivity {
     ScriptAndInputEvents,
     Script,
 }
 
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -1259,23 +1259,26 @@ impl DisplayListBuilder {
 
     pub fn define_sticky_frame(
         &mut self,
         id: Option<ClipId>,
         frame_rect: LayoutRect,
         margins: SideOffsets2D<Option<f32>>,
         vertical_offset_bounds: StickyOffsetBounds,
         horizontal_offset_bounds: StickyOffsetBounds,
+        previously_applied_offset: LayoutVector2D,
+
     ) -> ClipId {
         let id = self.generate_clip_id(id);
         let item = SpecificDisplayItem::StickyFrame(StickyFrameDisplayItem {
             id,
             margins,
             vertical_offset_bounds,
             horizontal_offset_bounds,
+            previously_applied_offset,
         });
 
         let info = LayoutPrimitiveInfo::new(frame_rect);
         self.push_item(item, &info);
         id
     }
 
     pub fn push_clip_id(&mut self, id: ClipId) {
--- a/gfx/webrender_bindings/webrender_ffi_generated.h
+++ b/gfx/webrender_bindings/webrender_ffi_generated.h
@@ -930,16 +930,20 @@ typedef TypedRect_u32__DevicePixel Devic
  */
 
 extern void AddFontData(WrFontKey aKey,
                         const uint8_t *aData,
                         size_t aSize,
                         uint32_t aIndex,
                         const ArcVecU8 *aVec);
 
+extern void AddNativeFontHandle(WrFontKey aKey,
+                                void *aHandle,
+                                uint32_t aIndex);
+
 extern void DeleteFontData(WrFontKey aKey);
 
 extern void gecko_printf_stderr_output(const char *aMsg);
 
 extern void gfx_critical_error(const char *aMsg);
 
 extern void gfx_critical_note(const char *aMsg);
 
@@ -1417,16 +1421,23 @@ void wr_resource_updates_add_external_im
                                             WrImageKey aImageKey,
                                             const WrImageDescriptor *aDescriptor,
                                             WrExternalImageId aExternalImageId,
                                             WrExternalImageBufferType aBufferType,
                                             uint8_t aChannelIndex)
 WR_FUNC;
 
 WR_INLINE
+void wr_resource_updates_add_font_descriptor(ResourceUpdates *aResources,
+                                             WrFontKey aKey,
+                                             WrVecU8 *aBytes,
+                                             uint32_t aIndex)
+WR_FUNC;
+
+WR_INLINE
 void wr_resource_updates_add_font_instance(ResourceUpdates *aResources,
                                            WrFontInstanceKey aKey,
                                            WrFontKey aFontKey,
                                            float aGlyphSize,
                                            const FontInstanceOptions *aOptions,
                                            const FontInstancePlatformOptions *aPlatformOptions,
                                            WrVecU8 *aVariations)
 WR_FUNC;
@@ -1441,23 +1452,16 @@ WR_FUNC;
 WR_INLINE
 void wr_resource_updates_add_raw_font(ResourceUpdates *aResources,
                                       WrFontKey aKey,
                                       WrVecU8 *aBytes,
                                       uint32_t aIndex)
 WR_FUNC;
 
 WR_INLINE
-void wr_resource_updates_add_font_descriptor(ResourceUpdates *aResources,
-                                             WrFontKey aKey,
-                                             WrVecU8 *aBytes,
-                                             uint32_t aIndex)
-WR_FUNC;
-
-WR_INLINE
 void wr_resource_updates_clear(ResourceUpdates *aResources)
 WR_FUNC;
 
 WR_INLINE
 void wr_resource_updates_delete(ResourceUpdates *aUpdates)
 WR_DESTRUCTOR_SAFE_FUNC;
 
 WR_INLINE