Bug 1417062 - Update webrender to commit d490a74c438d987122c600afca6bb2247ab38637. r?nical draft
authorKartikaya Gupta <kgupta@mozilla.com>
Fri, 17 Nov 2017 08:42:23 -0500
changeset 699655 e3480415a4699fa47bd2e3638d01db2289e7ec7f
parent 699635 010374bce60670cf1348150fe493d0193318d4f6
child 699656 0973d0d5298d45ec6671bc970ba6ffa98fadd275
push id89632
push userkgupta@mozilla.com
push dateFri, 17 Nov 2017 13:46:58 +0000
reviewersnical
bugs1417062
milestone59.0a1
Bug 1417062 - Update webrender to commit d490a74c438d987122c600afca6bb2247ab38637. r?nical MozReview-Commit-ID: 4i2RKAFTAMd
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/examples/animation.rs
gfx/webrender/examples/common/boilerplate.rs
gfx/webrender/res/ps_blend.glsl
gfx/webrender/src/box_shadow.rs
gfx/webrender/src/clip.rs
gfx/webrender/src/clip_scroll_node.rs
gfx/webrender/src/clip_scroll_tree.rs
gfx/webrender/src/debug_render.rs
gfx/webrender/src/debug_server.rs
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/platform/windows/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/profiler.rs
gfx/webrender/src/query.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/scene.rs
gfx/webrender/src/tiling.rs
gfx/webrender_api/src/api.rs
gfx/webrender_api/src/display_item.rs
gfx/webrender_bindings/webrender_ffi_generated.h
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 
 -------------------------------------------------------------------------------
 
 The version of WebRender currently in the tree is:
-8a39cf24f493e894a66c2465dd310a2b2923e558
+d490a74c438d987122c600afca6bb2247ab38637
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -6,17 +6,16 @@ license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 build = "build.rs"
 
 [features]
 default = ["freetype-lib"]
 freetype-lib = ["freetype/servo-freetype-sys"]
 profiler = ["thread_profiler/thread_profiler"]
 debugger = ["ws", "serde_json", "serde", "serde_derive"]
-query = []
 
 [dependencies]
 app_units = "0.5.6"
 bincode = "0.9"
 byteorder = "1.0"
 euclid = "0.15.5"
 fxhash = "0.2.1"
 gleam = "0.4.8"
--- a/gfx/webrender/examples/animation.rs
+++ b/gfx/webrender/examples/animation.rs
@@ -19,17 +19,19 @@ extern crate webrender;
 mod boilerplate;
 
 use boilerplate::{Example, HandyDandyRectBuilder};
 use euclid::Radians;
 use webrender::api::*;
 
 struct App {
     property_key: PropertyBindingKey<LayoutTransform>,
+    opacity_key: PropertyBindingKey<f32>,
     transform: LayoutTransform,
+    opacity: f32,
 }
 
 impl Example for App {
     fn render(
         &mut self,
         _api: &RenderApi,
         builder: &mut DisplayListBuilder,
         _resources: &mut ResourceUpdates,
@@ -44,70 +46,84 @@ impl Example for App {
             radii: BorderRadius::uniform(50.0),
             mode: ClipMode::Clip,
         };
         let info = LayoutPrimitiveInfo {
             local_clip: LocalClip::RoundedRect(bounds, complex_clip),
             .. LayoutPrimitiveInfo::new(bounds)
         };
 
+        let filters = vec![
+            FilterOp::Opacity(PropertyBinding::Binding(self.opacity_key), self.opacity),
+        ];
+
         builder.push_stacking_context(
             &info,
             ScrollPolicy::Scrollable,
             Some(PropertyBinding::Binding(self.property_key)),
             TransformStyle::Flat,
             None,
             MixBlendMode::Normal,
-            Vec::new(),
+            filters,
         );
 
         // Fill it with a white rect
         builder.push_rect(&info, ColorF::new(1.0, 1.0, 1.0, 1.0));
 
         builder.pop_stacking_context();
     }
 
     fn on_event(&mut self, event: glutin::Event, api: &RenderApi, document_id: DocumentId) -> bool {
         match event {
             glutin::Event::KeyboardInput(glutin::ElementState::Pressed, _, Some(key)) => {
-                let (offset_x, offset_y, angle) = match key {
-                    glutin::VirtualKeyCode::Down => (0.0, 10.0, 0.0),
-                    glutin::VirtualKeyCode::Up => (0.0, -10.0, 0.0),
-                    glutin::VirtualKeyCode::Right => (10.0, 0.0, 0.0),
-                    glutin::VirtualKeyCode::Left => (-10.0, 0.0, 0.0),
-                    glutin::VirtualKeyCode::Comma => (0.0, 0.0, 0.1),
-                    glutin::VirtualKeyCode::Period => (0.0, 0.0, -0.1),
+                let (offset_x, offset_y, angle, delta_opacity) = match key {
+                    glutin::VirtualKeyCode::Down => (0.0, 10.0, 0.0, 0.0),
+                    glutin::VirtualKeyCode::Up => (0.0, -10.0, 0.0, 0.0),
+                    glutin::VirtualKeyCode::Right => (10.0, 0.0, 0.0, 0.0),
+                    glutin::VirtualKeyCode::Left => (-10.0, 0.0, 0.0, 0.0),
+                    glutin::VirtualKeyCode::Comma => (0.0, 0.0, 0.1, 0.0),
+                    glutin::VirtualKeyCode::Period => (0.0, 0.0, -0.1, 0.0),
+                    glutin::VirtualKeyCode::Z => (0.0, 0.0, 0.0, -0.1),
+                    glutin::VirtualKeyCode::X => (0.0, 0.0, 0.0, 0.1),
                     _ => return false,
                 };
                 // Update the transform based on the keyboard input and push it to
                 // webrender using the generate_frame API. This will recomposite with
                 // the updated transform.
+                self.opacity += delta_opacity;
                 let new_transform = self.transform
                     .pre_rotate(0.0, 0.0, 1.0, Radians::new(angle))
                     .post_translate(LayoutVector3D::new(offset_x, offset_y, 0.0));
                 api.generate_frame(
                     document_id,
                     Some(DynamicProperties {
                         transforms: vec![
                             PropertyValue {
                                 key: self.property_key,
                                 value: new_transform,
                             },
                         ],
-                        floats: vec![],
+                        floats: vec![
+                            PropertyValue {
+                                key: self.opacity_key,
+                                value: self.opacity,
+                            }
+                        ],
                     }),
                 );
                 self.transform = new_transform;
             }
             _ => (),
         }
 
         false
     }
 }
 
 fn main() {
     let mut app = App {
         property_key: PropertyBindingKey::new(42), // arbitrary magic number
+        opacity_key: PropertyBindingKey::new(43),
         transform: LayoutTransform::create_translation(0.0, 0.0, 0.0),
+        opacity: 0.5,
     };
     boilerplate::main_wrapper(&mut app, None);
 }
--- a/gfx/webrender/examples/common/boilerplate.rs
+++ b/gfx/webrender/examples/common/boilerplate.rs
@@ -177,18 +177,17 @@ pub fn main_wrapper(example: &mut Exampl
 
         for event in window.poll_events() {
             events.push(event);
         }
 
         for event in events {
             match event {
                 glutin::Event::Closed |
-                glutin::Event::KeyboardInput(_, _, Some(glutin::VirtualKeyCode::Escape)) |
-                glutin::Event::KeyboardInput(_, _, Some(glutin::VirtualKeyCode::Q)) => break 'outer,
+                glutin::Event::KeyboardInput(_, _, Some(glutin::VirtualKeyCode::Escape)) => break 'outer,
 
                 glutin::Event::KeyboardInput(
                     glutin::ElementState::Pressed,
                     _,
                     Some(glutin::VirtualKeyCode::P),
                 ) => {
                     let mut flags = renderer.get_debug_flags();
                     flags.toggle(webrender::DebugFlags::PROFILER_DBG);
@@ -219,16 +218,23 @@ pub fn main_wrapper(example: &mut Exampl
                 ) => {
                     let mut flags = renderer.get_debug_flags();
                     flags.toggle(webrender::DebugFlags::ALPHA_PRIM_DBG);
                     renderer.set_debug_flags(flags);
                 }
                 glutin::Event::KeyboardInput(
                     glutin::ElementState::Pressed,
                     _,
+                    Some(glutin::VirtualKeyCode::Q),
+                ) => {
+                    renderer.toggle_queries_enabled();
+                }
+                glutin::Event::KeyboardInput(
+                    glutin::ElementState::Pressed,
+                    _,
                     Some(glutin::VirtualKeyCode::Key1),
                 ) => {
                     api.set_window_parameters(document_id,
                         size,
                         DeviceUintRect::new(DeviceUintPoint::zero(), size),
                         1.0
                     );
                 }
--- a/gfx/webrender/res/ps_blend.glsl
+++ b/gfx/webrender/res/ps_blend.glsl
@@ -85,73 +85,62 @@ void main(void) {
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-vec4 Blur(float radius, vec2 direction) {
-    // TODO(gw): Support blur in WR2!
-    return vec4(1.0);
-}
-
 vec4 Contrast(vec4 Cs, float amount) {
-    return vec4(Cs.rgb * amount - 0.5 * amount + 0.5, 1.0);
+    return vec4(Cs.rgb * amount - 0.5 * amount + 0.5, Cs.a);
 }
 
 vec4 Invert(vec4 Cs, float amount) {
-    Cs.rgb /= Cs.a;
-
-    vec3 color = mix(Cs.rgb, vec3(1.0) - Cs.rgb, amount);
-
-    // Pre-multiply the alpha into the output value.
-    return vec4(color.rgb * Cs.a, Cs.a);
+    return vec4(mix(Cs.rgb, vec3(1.0) - Cs.rgb, amount), Cs.a);
 }
 
 vec4 Brightness(vec4 Cs, float amount) {
-    // Un-premultiply the input.
-    Cs.rgb /= Cs.a;
-
     // Apply the brightness factor.
     // Resulting color needs to be clamped to output range
     // since we are pre-multiplying alpha in the shader.
-    vec3 color = clamp(Cs.rgb * amount, vec3(0.0), vec3(1.0));
-
-    // Pre-multiply the alpha into the output value.
-    return vec4(color.rgb * Cs.a, Cs.a);
+    return vec4(clamp(Cs.rgb * amount, vec3(0.0), vec3(1.0)), Cs.a);
 }
 
 vec4 Opacity(vec4 Cs, float amount) {
-    return Cs * amount;
+    return vec4(Cs.rgb, Cs.a * amount);
 }
 
 void main(void) {
     vec2 uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
     vec4 Cs = textureLod(sCacheRGBA8, vec3(uv, vUv.z), 0.0);
 
     if (Cs.a == 0.0) {
         discard;
     }
 
+    // Un-premultiply the input.
+    Cs.rgb /= Cs.a;
+
     switch (vOp) {
         case 0:
-            // Gaussian blur is specially handled:
-            oFragColor = Cs;// Blur(vAmount, vec2(0,0));
+            oFragColor = Cs;
             break;
         case 1:
             oFragColor = Contrast(Cs, vAmount);
             break;
         case 4:
             oFragColor = Invert(Cs, vAmount);
             break;
         case 7:
             oFragColor = Brightness(Cs, vAmount);
             break;
         case 8:
             oFragColor = Opacity(Cs, vAmount);
             break;
         default:
             oFragColor = vColorMat * Cs;
     }
+
+    // Pre-multiply the alpha into the output value.
+    oFragColor.rgb *= oFragColor.a;
 }
 #endif
--- a/gfx/webrender/src/box_shadow.rs
+++ b/gfx/webrender/src/box_shadow.rs
@@ -1,15 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadiusKind, ColorF, LayerPoint, LayerRect, LayerSize, LayerVector2D};
 use api::{BorderRadius, BoxShadowClipMode, LayoutSize, LayerPrimitiveInfo};
-use api::{ClipMode, ComplexClipRegion, EdgeAaSegmentMask, LocalClip, ClipAndScrollInfo};
+use api::{ClipMode, ClipAndScrollInfo, ComplexClipRegion, EdgeAaSegmentMask, LocalClip};
+use api::{PipelineId};
 use clip::ClipSource;
 use frame_builder::FrameBuilder;
 use prim_store::{PrimitiveContainer, RectangleContent, RectanglePrimitive};
 use prim_store::{BrushMaskKind, BrushKind, BrushPrimitive};
 use picture::PicturePrimitive;
 use util::RectHelpers;
 use render_task::MAX_BLUR_STD_DEVIATION;
 
@@ -19,16 +20,17 @@ pub const BLUR_SAMPLE_SCALE: f32 = 3.0;
 // The amount of padding added to the border corner drawn in the box shadow
 // mask. This ensures that we get a few pixels past the corner that can be
 // blurred without being affected by the border radius.
 pub const MASK_CORNER_PADDING: f32 = 4.0;
 
 impl FrameBuilder {
     pub fn add_box_shadow(
         &mut self,
+        pipeline_id: PipelineId,
         clip_and_scroll: ClipAndScrollInfo,
         prim_info: &LayerPrimitiveInfo,
         box_offset: &LayerVector2D,
         color: &ColorF,
         blur_radius: f32,
         spread_radius: f32,
         border_radius: BorderRadius,
         clip_mode: BoxShadowClipMode,
@@ -46,18 +48,18 @@ impl FrameBuilder {
             }
         };
 
         let shadow_radius = adjust_border_radius_for_box_shadow(
             border_radius,
             spread_amount,
         );
         let shadow_rect = prim_info.rect
-                                   .translate(box_offset)
-                                   .inflate(spread_amount, spread_amount);
+            .translate(box_offset)
+            .inflate(spread_amount, spread_amount);
 
         if blur_radius == 0.0 {
             let mut clips = Vec::new();
 
             let fast_info = match clip_mode {
                 BoxShadowClipMode::Outset => {
                     // TODO(gw): Add a fast path for ClipOut + zero border radius!
                     clips.push(ClipSource::RoundedRectangle(
@@ -180,23 +182,22 @@ impl FrameBuilder {
                     // Create a box shadow picture and add the mask primitive to it.
                     let pic_rect = shadow_rect.inflate(blur_offset, blur_offset);
                     let mut pic_prim = PicturePrimitive::new_box_shadow(
                         blur_radius,
                         *color,
                         Vec::new(),
                         clip_mode,
                         radii_kind,
+                        pipeline_id,
                     );
                     pic_prim.add_primitive(
                         brush_prim_index,
-                        &brush_rect,
                         clip_and_scroll
                     );
-                    pic_prim.build();
 
                     // TODO(gw): Right now, we always use a clip out
                     //           mask for outset shadows. We can make this
                     //           much more efficient when we have proper
                     //           segment logic, by avoiding drawing
                     //           most of the pixels inside and just
                     //           clipping out along the edges.
                     extra_clips.push(ClipSource::RoundedRectangle(
@@ -259,23 +260,22 @@ impl FrameBuilder {
                     // the brush primitive to it.
                     let mut pic_prim = PicturePrimitive::new_box_shadow(
                         blur_radius,
                         *color,
                         Vec::new(),
                         BoxShadowClipMode::Inset,
                         // TODO(gw): Make use of optimization for inset.
                         BorderRadiusKind::NonUniform,
+                        pipeline_id,
                     );
                     pic_prim.add_primitive(
                         brush_prim_index,
-                        &brush_rect,
                         clip_and_scroll
                     );
-                    pic_prim.build();
 
                     // Draw the picture one pixel outside the original
                     // rect to account for the inflate above. This
                     // extra edge will be clipped by the local clip
                     // rect set below.
                     let pic_rect = prim_info.rect.inflate(inflate_size, inflate_size);
                     let pic_info = LayerPrimitiveInfo::with_clip_rect(
                         pic_rect,
--- a/gfx/webrender/src/clip.rs
+++ b/gfx/webrender/src/clip.rs
@@ -8,17 +8,17 @@ use api::{LayerToWorldTransform, LayoutP
 use border::BorderCornerClipSource;
 use ellipse::Ellipse;
 use freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle, ToGpuBlocks};
 use prim_store::{ClipData, ImageMaskData};
 use resource_cache::ResourceCache;
 use util::{extract_inner_rect_safe, TransformedRect};
 
-const MAX_CLIP: f32 = 1000000.0;
+pub const MAX_CLIP: f32 = 1000000.0;
 
 pub type ClipStore = FreeList<ClipSources>;
 pub type ClipSourcesHandle = FreeListHandle<ClipSources>;
 pub type ClipSourcesWeakHandle = WeakFreeListHandle<ClipSources>;
 
 #[derive(Clone, Debug)]
 pub struct ClipRegion {
     pub main: LayerRect,
--- a/gfx/webrender/src/clip_scroll_node.rs
+++ b/gfx/webrender/src/clip_scroll_node.rs
@@ -1,24 +1,25 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ClipId, DeviceIntRect, LayerPixel, LayerPoint, LayerRect, LayerSize};
 use api::{LayerToScrollTransform, LayerToWorldTransform, LayerVector2D, LayoutVector2D, PipelineId};
 use api::{ScrollClamping, ScrollEventPhase, ScrollLocation, ScrollSensitivity};
-use api::{StickyOffsetBounds, WorldPoint};
+use api::{LayoutTransform, PropertyBinding, StickyOffsetBounds, WorldPoint};
 use clip::{ClipSourcesHandle, ClipStore};
 use clip_scroll_tree::{CoordinateSystemId, TransformUpdateState};
 use euclid::SideOffsets2D;
 use geometry::ray_intersects_rect;
 use gpu_cache::GpuCache;
 use gpu_types::{ClipScrollNodeIndex, ClipScrollNodeData};
 use render_task::{ClipChain, ClipChainNode, ClipWorkItem};
 use resource_cache::ResourceCache;
+use scene::SceneProperties;
 use spring::{DAMPING, STIFFNESS, Spring};
 use std::rc::Rc;
 use util::{MatrixHelpers, MaxRect};
 
 #[cfg(target_os = "macos")]
 const CAN_OVERSCROLL: bool = true;
 
 #[cfg(not(target_os = "macos"))]
@@ -180,22 +181,26 @@ impl ClipScrollNode {
         clip_rect: LayerRect,
     ) -> Self {
         Self::new(pipeline_id, Some(parent_id), &clip_rect, NodeType::Clip(handle))
     }
 
     pub fn new_reference_frame(
         parent_id: Option<ClipId>,
         frame_rect: &LayerRect,
-        transform: &LayerToScrollTransform,
+        source_transform: Option<PropertyBinding<LayoutTransform>>,
+        source_perspective: Option<LayoutTransform>,
         origin_in_parent_reference_frame: LayerVector2D,
         pipeline_id: PipelineId,
     ) -> Self {
+        let identity = LayoutTransform::identity();
         let info = ReferenceFrameInfo {
-            transform: *transform,
+            resolved_transform: LayerToScrollTransform::identity(),
+            source_transform: source_transform.unwrap_or(PropertyBinding::Value(identity)),
+            source_perspective: source_perspective.unwrap_or(identity),
             origin_in_parent_reference_frame,
         };
         Self::new(pipeline_id, parent_id, frame_rect, NodeType::ReferenceFrame(info))
     }
 
     pub fn new_sticky_frame(
         parent_id: ClipId,
         frame_rect: LayerRect,
@@ -268,22 +273,23 @@ impl ClipScrollNode {
     pub fn update(
         &mut self,
         state: &mut TransformUpdateState,
         node_data: &mut Vec<ClipScrollNodeData>,
         device_pixel_ratio: f32,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
+        scene_properties: &SceneProperties,
     ) {
         // We set this earlier so that we can use it before we have all the data necessary
         // to populate the ClipScrollNodeData.
         self.node_data_index = ClipScrollNodeIndex(node_data.len() as u32);
 
-        self.update_transform(state);
+        self.update_transform(state, scene_properties);
         self.update_clip_work_item(
             state,
             device_pixel_ratio,
             clip_store,
             resource_cache,
             gpu_cache,
         );
 
@@ -304,16 +310,17 @@ impl ClipScrollNode {
                     local_clip_rect,
                     reference_frame_relative_scroll_offset:
                         self.reference_frame_relative_scroll_offset,
                     scroll_offset: self.scroll_offset(),
                 }
             }
             None => {
                 state.combined_outer_clip_bounds = DeviceIntRect::zero();
+                self.combined_clip_outer_bounds = DeviceIntRect::zero();
                 ClipScrollNodeData::invalid()
             }
         };
 
         // Write the data that will be made available to the GPU for this node.
         node_data.push(data);
     }
 
@@ -360,30 +367,43 @@ impl ClipScrollNode {
             },
             prev: current_clip_chain,
         }));
 
         state.combined_outer_clip_bounds = self.combined_clip_outer_bounds;
         state.parent_clip_chain = self.clip_chain_node.clone();
     }
 
-    pub fn update_transform(&mut self, state: &mut TransformUpdateState) {
+    pub fn update_transform(
+        &mut self,
+        state: &mut TransformUpdateState,
+        scene_properties: &SceneProperties,
+    ) {
         // We calculate this here to avoid a double-borrow later.
         let sticky_offset = self.calculate_sticky_offset(
             &state.nearest_scrolling_ancestor_offset,
             &state.nearest_scrolling_ancestor_viewport,
         );
 
         let (local_transform, accumulated_scroll_offset) = match self.node_type {
-            NodeType::ReferenceFrame(ref info) => {
-                self.combined_local_viewport_rect = info.transform
+            NodeType::ReferenceFrame(ref mut info) => {
+                // Resolve the transform against any property bindings.
+                let source_transform = scene_properties.resolve_layout_transform(&info.source_transform);
+                info.resolved_transform = LayerToScrollTransform::create_translation(
+                    info.origin_in_parent_reference_frame.x,
+                    info.origin_in_parent_reference_frame.y,
+                    0.0
+                ).pre_mul(&source_transform)
+                 .pre_mul(&info.source_perspective);
+
+                self.combined_local_viewport_rect = info.resolved_transform
                     .with_destination::<LayerPixel>()
                     .inverse_rect_footprint(&state.parent_combined_viewport_rect);
                 self.reference_frame_relative_scroll_offset = LayerVector2D::zero();
-                (info.transform, state.parent_accumulated_scroll_offset)
+                (info.resolved_transform, state.parent_accumulated_scroll_offset)
             }
             NodeType::Clip(_) | NodeType::ScrollFrame(_) => {
                 // Move the parent's viewport into the local space (of the node origin)
                 // and intersect with the local clip rectangle to get the local viewport.
                 self.combined_local_viewport_rect =
                     state.parent_combined_viewport_rect
                     .intersection(&self.local_clip_rect)
                     .unwrap_or(LayerRect::zero());
@@ -431,17 +451,17 @@ impl ClipScrollNode {
             NodeType::ReferenceFrame(ref info) => {
                 state.parent_reference_frame_transform = self.world_viewport_transform;
                 state.parent_combined_viewport_rect = self.combined_local_viewport_rect;
                 state.parent_accumulated_scroll_offset = LayerVector2D::zero();
                 state.nearest_scrolling_ancestor_viewport =
                     state.nearest_scrolling_ancestor_viewport
                        .translate(&info.origin_in_parent_reference_frame);
 
-                if !info.transform.preserves_2d_axis_alignment() {
+                if !info.resolved_transform.preserves_2d_axis_alignment() {
                     state.current_coordinate_system_id = state.next_coordinate_system_id;
                     state.next_coordinate_system_id = state.next_coordinate_system_id.next();
                 }
             },
             NodeType::Clip(..) => {
                 state.parent_combined_viewport_rect = self.combined_local_viewport_rect;
             },
             NodeType::ScrollFrame(ref scrolling) => {
@@ -782,15 +802,22 @@ impl ScrollingState {
     }
 }
 
 /// Contains information about reference frames.
 #[derive(Copy, Clone, Debug)]
 pub struct ReferenceFrameInfo {
     /// The transformation that establishes this reference frame, relative to the parent
     /// reference frame. The origin of the reference frame is included in the transformation.
-    pub transform: LayerToScrollTransform,
+    pub resolved_transform: LayerToScrollTransform,
+
+    /// The source transform and perspective matrices provided by the stacking context
+    /// that forms this reference frame. We maintain the property binding information
+    /// here so that we can resolve the animated transform and update the tree each
+    /// frame.
+    pub source_transform: PropertyBinding<LayoutTransform>,
+    pub source_perspective: LayoutTransform,
 
     /// The original, not including the transform and relative to the parent reference frame,
     /// origin of this reference frame. This is already rolled into the `transform' property, but
     /// we also store it here to properly transform the viewport for sticky positioning.
     pub origin_in_parent_reference_frame: LayerVector2D,
 }
--- a/gfx/webrender/src/clip_scroll_tree.rs
+++ b/gfx/webrender/src/clip_scroll_tree.rs
@@ -1,23 +1,24 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ClipId, DeviceIntRect, LayerPoint, LayerRect, LayerToScrollTransform};
+use api::{ClipId, DeviceIntRect, LayerPoint, LayerRect};
 use api::{LayerToWorldTransform, LayerVector2D, PipelineId, ScrollClamping, ScrollEventPhase};
-use api::{ScrollLayerState, ScrollLocation, WorldPoint};
+use api::{PropertyBinding, LayoutTransform, ScrollLayerState, ScrollLocation, WorldPoint};
 use clip::ClipStore;
 use clip_scroll_node::{ClipScrollNode, NodeType, ScrollingState, StickyFrameInfo};
 use gpu_cache::GpuCache;
 use gpu_types::ClipScrollNodeData;
 use internal_types::{FastHashMap, FastHashSet};
 use print_tree::{PrintTree, PrintTreePrinter};
 use render_task::ClipChain;
 use resource_cache::ResourceCache;
+use scene::SceneProperties;
 
 pub type ScrollStates = FastHashMap<ClipId, ScrollingState>;
 
 /// An id that identifies coordinate systems in the ClipScrollTree. Each
 /// coordinate system has an id and those ids will be shared when the coordinates
 /// system are the same or are in the same axis-aligned space. This allows
 /// for optimizing mask generation.
 #[derive(Debug, Copy, Clone, PartialEq)]
@@ -188,19 +189,18 @@ impl ClipScrollTree {
             }
         };
 
         if !node.local_clip_rect.contains(&transformed_point) {
             cache.insert(*node_id, None);
             return false;
         }
 
-        let point_in_clips = transformed_point - node.local_clip_rect.origin.to_vector();
         for &(ref clip, _) in clip_store.get(&clip_sources_handle).clips() {
-            if !clip.contains(&point_in_clips) {
+            if !clip.contains(&transformed_point) {
                 cache.insert(*node_id, None);
                 return false;
             }
         }
 
         cache.insert(*node_id, Some(point_in_layer));
 
         true
@@ -329,16 +329,17 @@ impl ClipScrollTree {
         &mut self,
         screen_rect: &DeviceIntRect,
         device_pixel_ratio: f32,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         pan: LayerPoint,
         node_data: &mut Vec<ClipScrollNodeData>,
+        scene_properties: &SceneProperties,
     ) {
         if self.nodes.is_empty() {
             return;
         }
 
         let root_reference_frame_id = self.root_reference_frame_id();
         let root_viewport = self.nodes[&root_reference_frame_id].local_clip_rect;
 
@@ -360,28 +361,30 @@ impl ClipScrollTree {
         self.update_node(
             root_reference_frame_id,
             &mut state,
             device_pixel_ratio,
             clip_store,
             resource_cache,
             gpu_cache,
             node_data,
+            scene_properties,
         );
     }
 
     fn update_node(
         &mut self,
         layer_id: ClipId,
         state: &mut TransformUpdateState,
         device_pixel_ratio: f32,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         node_data: &mut Vec<ClipScrollNodeData>,
+        scene_properties: &SceneProperties,
     ) {
         // TODO(gw): This is an ugly borrow check workaround to clone these.
         //           Restructure this to avoid the clones!
         let mut state = state.clone();
         let node_children = {
             let node = match self.nodes.get_mut(&layer_id) {
                 Some(node) => node,
                 None => return,
@@ -389,30 +392,32 @@ impl ClipScrollTree {
 
             node.update(
                 &mut state,
                 node_data,
                 device_pixel_ratio,
                 clip_store,
                 resource_cache,
                 gpu_cache,
+                scene_properties,
             );
 
             node.children.clone()
         };
 
         for child_layer_id in node_children {
             self.update_node(
                 child_layer_id,
                 &mut state,
                 device_pixel_ratio,
                 clip_store,
                 resource_cache,
                 gpu_cache,
                 node_data,
+                scene_properties,
             );
         }
     }
 
     pub fn tick_scrolling_bounce_animations(&mut self) {
         for (_, node) in &mut self.nodes {
             node.tick_scrolling_bounce_animation()
         }
@@ -436,32 +441,34 @@ impl ClipScrollTree {
         let new_id = ClipId::DynamicallyAddedNode(self.current_new_node_item, pipeline_id);
         self.current_new_node_item += 1;
         new_id
     }
 
     pub fn add_reference_frame(
         &mut self,
         rect: &LayerRect,
-        transform: &LayerToScrollTransform,
+        source_transform: Option<PropertyBinding<LayoutTransform>>,
+        source_perspective: Option<LayoutTransform>,
         origin_in_parent_reference_frame: LayerVector2D,
         pipeline_id: PipelineId,
         parent_id: Option<ClipId>,
         root_for_pipeline: bool,
     ) -> ClipId {
         let reference_frame_id = if root_for_pipeline {
             ClipId::root_reference_frame(pipeline_id)
         } else {
             self.generate_new_clip_id(pipeline_id)
         };
 
         let node = ClipScrollNode::new_reference_frame(
             parent_id,
             rect,
-            transform,
+            source_transform,
+            source_perspective,
             origin_in_parent_reference_frame,
             pipeline_id,
         );
         self.add_node(node, reference_frame_id);
         reference_frame_id
     }
 
     pub fn add_sticky_frame(
@@ -511,17 +518,17 @@ impl ClipScrollTree {
                 let clips = clip_store.get(&clip_sources_handle).clips();
                 pt.new_level(format!("Clip Sources [{}]", clips.len()));
                 for source in clips {
                     pt.add_item(format!("{:?}", source));
                 }
                 pt.end_level();
             }
             NodeType::ReferenceFrame(ref info) => {
-                pt.new_level(format!("ReferenceFrame {:?}", info.transform));
+                pt.new_level(format!("ReferenceFrame {:?}", info.resolved_transform));
                 pt.add_item(format!("id: {:?}", id));
             }
             NodeType::ScrollFrame(scrolling_info) => {
                 pt.new_level(format!("ScrollFrame"));
                 pt.add_item(format!("id: {:?}", id));
                 pt.add_item(format!("scrollable_size: {:?}", scrolling_info.scrollable_size));
                 pt.add_item(format!("scroll.offset: {:?}", scrolling_info.offset));
             }
--- a/gfx/webrender/src/debug_render.rs
+++ b/gfx/webrender/src/debug_render.rs
@@ -1,15 +1,15 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ColorU, DeviceIntRect, DeviceUintSize, ImageFormat};
 use debug_font_data;
-use device::{Device, GpuMarker, Program, Texture, TextureSlot, VertexDescriptor, VAO};
+use device::{Device, Program, Texture, TextureSlot, VertexDescriptor, VAO};
 use device::{TextureFilter, TextureTarget, VertexAttribute, VertexAttributeKind, VertexUsageHint};
 use euclid::{Point2D, Rect, Size2D, Transform3D};
 use internal_types::{ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE};
 use internal_types::RenderTargetMode;
 use std::f32;
 
 #[derive(Debug, Copy, Clone)]
 enum DebugSampler {
@@ -100,17 +100,17 @@ pub struct DebugRenderer {
     tri_indices: Vec<u32>,
     tri_vao: VAO,
     line_vertices: Vec<DebugColorVertex>,
     line_vao: VAO,
     color_program: Program,
 }
 
 impl DebugRenderer {
-    pub fn new(device: &mut Device) -> DebugRenderer {
+    pub fn new(device: &mut Device) -> Self {
         let font_program = device.create_program("debug_font", "", &DESC_FONT).unwrap();
         device.bind_shader_samplers(&font_program, &[("sColor0", DebugSampler::Font)]);
 
         let color_program = device
             .create_program("debug_color", "", &DESC_COLOR)
             .unwrap();
 
         let font_vao = device.create_vao(&DESC_FONT);
@@ -258,17 +258,16 @@ impl DebugRenderer {
         let p1 = p0 + rect.size;
         self.add_line(p0.x, p0.y, color, p1.x, p0.y, color);
         self.add_line(p1.x, p0.y, color, p1.x, p1.y, color);
         self.add_line(p1.x, p1.y, color, p0.x, p1.y, color);
         self.add_line(p0.x, p1.y, color, p0.x, p0.y, color);
     }
 
     pub fn render(&mut self, device: &mut Device, viewport_size: &DeviceUintSize) {
-        let _gm = GpuMarker::new(device.rc_gl(), "debug");
         device.disable_depth();
         device.set_blend(true);
         device.set_blend_mode_premultiplied_alpha();
 
         let projection = Transform3D::ortho(
             0.0,
             viewport_size.width as f32,
             viewport_size.height as f32,
--- a/gfx/webrender/src/debug_server.rs
+++ b/gfx/webrender/src/debug_server.rs
@@ -48,16 +48,20 @@ impl ws::Handler for Server {
                     "enable_profiler" => DebugCommand::EnableProfiler(true),
                     "disable_profiler" => DebugCommand::EnableProfiler(false),
                     "enable_texture_cache_debug" => DebugCommand::EnableTextureCacheDebug(true),
                     "disable_texture_cache_debug" => DebugCommand::EnableTextureCacheDebug(false),
                     "enable_render_target_debug" => DebugCommand::EnableRenderTargetDebug(true),
                     "disable_render_target_debug" => DebugCommand::EnableRenderTargetDebug(false),
                     "enable_alpha_rects_debug" => DebugCommand::EnableAlphaRectsDebug(true),
                     "disable_alpha_rects_debug" => DebugCommand::EnableAlphaRectsDebug(false),
+                    "enable_gpu_time_queries" => DebugCommand::EnableGpuTimeQueries(true),
+                    "disable_gpu_time_queries" => DebugCommand::EnableGpuTimeQueries(false),
+                    "enable_gpu_sample_queries" => DebugCommand::EnableGpuSampleQueries(true),
+                    "disable_gpu_sample_queries" => DebugCommand::EnableGpuSampleQueries(false),
                     "fetch_passes" => DebugCommand::FetchPasses,
                     "fetch_documents" => DebugCommand::FetchDocuments,
                     "fetch_clipscrolltree" => DebugCommand::FetchClipScrollTree,
                     msg => {
                         println!("unknown msg {}", msg);
                         return Ok(());
                     }
                 };
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -480,318 +480,16 @@ pub struct FBOId(gl::GLuint);
 pub struct RBOId(gl::GLuint);
 
 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
 pub struct VBOId(gl::GLuint);
 
 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
 struct IBOId(gl::GLuint);
 
-#[cfg(feature = "query")]
-const MAX_PROFILE_FRAMES: usize = 4;
-
-pub trait NamedTag {
-    fn get_label(&self) -> &str;
-}
-
-#[derive(Debug, Clone)]
-pub struct GpuTimer<T> {
-    pub tag: T,
-    pub time_ns: u64,
-}
-
-#[derive(Debug, Clone)]
-pub struct GpuSampler<T> {
-    pub tag: T,
-    pub count: u64,
-}
-
-#[cfg(feature = "query")]
-pub struct QuerySet<T> {
-    set: Vec<gl::GLuint>,
-    data: Vec<T>,
-    pending: gl::GLuint,
-}
-
-#[cfg(feature = "query")]
-impl<T> QuerySet<T> {
-    fn new(set: Vec<gl::GLuint>) -> Self {
-        QuerySet {
-            set,
-            data: Vec::new(),
-            pending: 0,
-        }
-    }
-
-    fn reset(&mut self) {
-        self.data.clear();
-        self.pending = 0;
-    }
-
-    fn add(&mut self, value: T) -> Option<gl::GLuint> {
-        assert_eq!(self.pending, 0);
-        self.set.get(self.data.len()).cloned().map(|query_id| {
-            self.data.push(value);
-            self.pending = query_id;
-            query_id
-        })
-    }
-
-    fn take<F: Fn(&mut T, gl::GLuint)>(&mut self, fun: F) -> Vec<T> {
-        let mut data = mem::replace(&mut self.data, Vec::new());
-        for (value, &query) in data.iter_mut().zip(self.set.iter()) {
-            fun(value, query)
-        }
-        data
-    }
-}
-
-#[cfg(feature = "query")]
-pub struct GpuFrameProfile<T> {
-    gl: Rc<gl::Gl>,
-    timers: QuerySet<GpuTimer<T>>,
-    samplers: QuerySet<GpuSampler<T>>,
-    frame_id: FrameId,
-    inside_frame: bool,
-}
-
-#[cfg(feature = "query")]
-impl<T> GpuFrameProfile<T> {
-    const MAX_TIMERS_PER_FRAME: usize = 256;
-    // disable samplers on OSX due to driver bugs
-    #[cfg(target_os = "macos")]
-    const MAX_SAMPLERS_PER_FRAME: usize = 0;
-    #[cfg(not(target_os = "macos"))]
-    const MAX_SAMPLERS_PER_FRAME: usize = 16;
-
-    fn new(gl: Rc<gl::Gl>) -> Self {
-        assert_eq!(gl.get_type(), gl::GlType::Gl);
-        let time_queries = gl.gen_queries(Self::MAX_TIMERS_PER_FRAME as _);
-        let sample_queries = gl.gen_queries(Self::MAX_SAMPLERS_PER_FRAME as _);
-
-        GpuFrameProfile {
-            gl,
-            timers: QuerySet::new(time_queries),
-            samplers: QuerySet::new(sample_queries),
-            frame_id: FrameId(0),
-            inside_frame: false,
-        }
-    }
-
-    fn begin_frame(&mut self, frame_id: FrameId) {
-        self.frame_id = frame_id;
-        self.timers.reset();
-        self.samplers.reset();
-        self.inside_frame = true;
-    }
-
-    fn end_frame(&mut self) {
-        self.done_marker();
-        self.done_sampler();
-        self.inside_frame = false;
-    }
-
-    fn done_marker(&mut self) {
-        debug_assert!(self.inside_frame);
-        if self.timers.pending != 0 {
-            self.gl.end_query(gl::TIME_ELAPSED);
-            self.timers.pending = 0;
-        }
-    }
-
-    fn add_marker(&mut self, tag: T) -> GpuMarker
-    where
-        T: NamedTag,
-    {
-        self.done_marker();
-
-        let marker = GpuMarker::new(&self.gl, tag.get_label());
-
-        if let Some(query) = self.timers.add(GpuTimer { tag, time_ns: 0 }) {
-            self.gl.begin_query(gl::TIME_ELAPSED, query);
-        }
-
-        marker
-    }
-
-    fn done_sampler(&mut self) {
-        debug_assert!(self.inside_frame);
-        if self.samplers.pending != 0 {
-            self.gl.end_query(gl::SAMPLES_PASSED);
-            self.samplers.pending = 0;
-        }
-    }
-
-    fn add_sampler(&mut self, tag: T)
-    where
-        T: NamedTag,
-    {
-        self.done_sampler();
-
-        if let Some(query) = self.samplers.add(GpuSampler { tag, count: 0 }) {
-            self.gl.begin_query(gl::SAMPLES_PASSED, query);
-        }
-    }
-
-    fn is_valid(&self) -> bool {
-        !self.timers.set.is_empty() || !self.samplers.set.is_empty()
-    }
-
-    fn build_samples(&mut self) -> (Vec<GpuTimer<T>>, Vec<GpuSampler<T>>) {
-        debug_assert!(!self.inside_frame);
-        let gl = &self.gl;
-
-        (
-            self.timers.take(|timer, query| {
-                timer.time_ns = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
-            }),
-            self.samplers.take(|sampler, query| {
-                sampler.count = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
-            }),
-        )
-    }
-}
-
-#[cfg(feature = "query")]
-impl<T> Drop for GpuFrameProfile<T> {
-    fn drop(&mut self) {
-        if !self.timers.set.is_empty() {
-            self.gl.delete_queries(&self.timers.set);
-        }
-        if !self.samplers.set.is_empty() {
-            self.gl.delete_queries(&self.samplers.set);
-        }
-    }
-}
-
-#[cfg(feature = "query")]
-pub struct GpuProfiler<T> {
-    frames: [GpuFrameProfile<T>; MAX_PROFILE_FRAMES],
-    next_frame: usize,
-}
-
-#[cfg(feature = "query")]
-impl<T> GpuProfiler<T> {
-    pub fn new(gl: &Rc<gl::Gl>) -> Self {
-        GpuProfiler {
-            next_frame: 0,
-            frames: [
-                GpuFrameProfile::new(Rc::clone(gl)),
-                GpuFrameProfile::new(Rc::clone(gl)),
-                GpuFrameProfile::new(Rc::clone(gl)),
-                GpuFrameProfile::new(Rc::clone(gl)),
-            ],
-        }
-    }
-
-    pub fn build_samples(&mut self) -> Option<(FrameId, Vec<GpuTimer<T>>, Vec<GpuSampler<T>>)> {
-        let frame = &mut self.frames[self.next_frame];
-        if frame.is_valid() {
-            let (timers, samplers) = frame.build_samples();
-            Some((frame.frame_id, timers, samplers))
-        } else {
-            None
-        }
-    }
-
-    pub fn begin_frame(&mut self, frame_id: FrameId) {
-        let frame = &mut self.frames[self.next_frame];
-        frame.begin_frame(frame_id);
-    }
-
-    pub fn end_frame(&mut self) {
-        let frame = &mut self.frames[self.next_frame];
-        frame.end_frame();
-        self.next_frame = (self.next_frame + 1) % MAX_PROFILE_FRAMES;
-    }
-
-    pub fn add_marker(&mut self, tag: T) -> GpuMarker
-    where
-        T: NamedTag,
-    {
-        self.frames[self.next_frame].add_marker(tag)
-    }
-
-    pub fn add_sampler(&mut self, tag: T)
-    where
-        T: NamedTag,
-    {
-        self.frames[self.next_frame].add_sampler(tag)
-    }
-
-    pub fn done_sampler(&mut self) {
-        self.frames[self.next_frame].done_sampler()
-    }
-}
-
-#[cfg(not(feature = "query"))]
-pub struct GpuProfiler<T>(Option<T>);
-
-#[cfg(not(feature = "query"))]
-impl<T> GpuProfiler<T> {
-    pub fn new(_: &Rc<gl::Gl>) -> Self {
-        GpuProfiler(None)
-    }
-
-    pub fn build_samples(&mut self) -> Option<(FrameId, Vec<GpuTimer<T>>, Vec<GpuSampler<T>>)> {
-        None
-    }
-
-    pub fn begin_frame(&mut self, _: FrameId) {}
-
-    pub fn end_frame(&mut self) {}
-
-    pub fn add_marker(&mut self, _: T) -> GpuMarker {
-        GpuMarker {}
-    }
-
-    pub fn add_sampler(&mut self, _: T) {}
-
-    pub fn done_sampler(&mut self) {}
-}
-
-
-#[must_use]
-pub struct GpuMarker {
-    #[cfg(feature = "query")]
-    gl: Rc<gl::Gl>,
-}
-
-#[cfg(feature = "query")]
-impl GpuMarker {
-    pub fn new(gl: &Rc<gl::Gl>, message: &str) -> Self {
-        debug_assert_eq!(gl.get_type(), gl::GlType::Gl);
-        gl.push_group_marker_ext(message);
-        GpuMarker { gl: Rc::clone(gl) }
-    }
-
-    pub fn fire(gl: &gl::Gl, message: &str) {
-        debug_assert_eq!(gl.get_type(), gl::GlType::Gl);
-        gl.insert_event_marker_ext(message);
-    }
-}
-
-#[cfg(feature = "query")]
-impl Drop for GpuMarker {
-    fn drop(&mut self) {
-        self.gl.pop_group_marker_ext();
-    }
-}
-
-#[cfg(not(feature = "query"))]
-impl GpuMarker {
-    #[inline]
-    pub fn new(_: &Rc<gl::Gl>, _: &str) -> Self {
-        GpuMarker{}
-    }
-    #[inline]
-    pub fn fire(_: &gl::Gl, _: &str) {}
-}
-
-
 #[derive(Debug, Copy, Clone)]
 pub enum VertexUsageHint {
     Static,
     Dynamic,
     Stream,
 }
 
 impl VertexUsageHint {
@@ -1133,25 +831,24 @@ impl Device {
         texture.height = height;
         texture.filter = filter;
         texture.layer_count = layer_count;
         texture.mode = mode;
 
         let (internal_format, gl_format) = gl_texture_formats_for_image_format(self.gl(), format);
         let type_ = gl_type_for_texture_format(format);
 
+        self.bind_texture(DEFAULT_TEXTURE, texture);
+        self.set_texture_parameters(texture.target, filter);
+
         match mode {
             RenderTargetMode::RenderTarget => {
-                self.bind_texture(DEFAULT_TEXTURE, texture);
-                self.set_texture_parameters(texture.target, filter);
-                self.update_texture_storage(texture, layer_count, resized);
+                self.update_texture_storage(texture, resized);
             }
             RenderTargetMode::None => {
-                self.bind_texture(DEFAULT_TEXTURE, texture);
-                self.set_texture_parameters(texture.target, filter);
                 let expanded_data: Vec<u8>;
                 let actual_pixels = if pixels.is_some() && format == ImageFormat::A8 &&
                     cfg!(any(target_arch = "arm", target_arch = "aarch64"))
                 {
                     expanded_data = pixels
                         .unwrap()
                         .iter()
                         .flat_map(|&byte| repeat(byte).take(4))
@@ -1192,72 +889,75 @@ impl Device {
                     _ => panic!("BUG: Unexpected texture target!"),
                 }
             }
         }
     }
 
     /// Updates the texture storage for the texture, creating
     /// FBOs as required.
-    fn update_texture_storage(&mut self, texture: &mut Texture, layer_count: i32, resized: bool) {
-        assert!(layer_count > 0);
+    fn update_texture_storage(&mut self, texture: &mut Texture, resized: bool) {
+        assert!(texture.layer_count > 0);
         assert_eq!(texture.target, gl::TEXTURE_2D_ARRAY);
 
-        let current_layer_count = texture.fbo_ids.len() as i32;
+        let needed_layer_count = texture.layer_count - texture.fbo_ids.len() as i32;
         // If the texture is already the required size skip.
-        if current_layer_count == layer_count && !resized {
+        if needed_layer_count == 0 && !resized {
             return;
         }
 
         let (internal_format, gl_format) =
             gl_texture_formats_for_image_format(&*self.gl, texture.format);
         let type_ = gl_type_for_texture_format(texture.format);
 
         self.gl.tex_image_3d(
             texture.target,
             0,
             internal_format as gl::GLint,
             texture.width as gl::GLint,
             texture.height as gl::GLint,
-            layer_count,
+            texture.layer_count,
             0,
             gl_format,
             type_,
             None,
         );
 
-        let needed_layer_count = layer_count - current_layer_count;
         if needed_layer_count > 0 {
             // Create more framebuffers to fill the gap
             let new_fbos = self.gl.gen_framebuffers(needed_layer_count);
             texture
                 .fbo_ids
-                .extend(new_fbos.into_iter().map(|id| FBOId(id)));
+                .extend(new_fbos.into_iter().map(FBOId));
         } else if needed_layer_count < 0 {
             // Remove extra framebuffers
-            for old in texture.fbo_ids.drain(layer_count as usize ..) {
+            for old in texture.fbo_ids.drain(texture.layer_count as usize ..) {
                 self.gl.delete_framebuffers(&[old.0]);
             }
         }
 
-        let depth_rb = if let Some(rbo) = texture.depth_rb {
-            rbo.0
-        } else {
-            let renderbuffer_ids = self.gl.gen_renderbuffers(1);
-            let depth_rb = renderbuffer_ids[0];
-            texture.depth_rb = Some(RBOId(depth_rb));
-            depth_rb
+        let (depth_rb, depth_alloc) = match texture.depth_rb {
+            Some(rbo) => (rbo.0, resized),
+            None => {
+                let renderbuffer_ids = self.gl.gen_renderbuffers(1);
+                let depth_rb = renderbuffer_ids[0];
+                texture.depth_rb = Some(RBOId(depth_rb));
+                (depth_rb, true)
+            }
         };
-        self.gl.bind_renderbuffer(gl::RENDERBUFFER, depth_rb);
-        self.gl.renderbuffer_storage(
-            gl::RENDERBUFFER,
-            gl::DEPTH_COMPONENT24,
-            texture.width as gl::GLsizei,
-            texture.height as gl::GLsizei,
-        );
+
+        if depth_alloc {
+            self.gl.bind_renderbuffer(gl::RENDERBUFFER, depth_rb);
+            self.gl.renderbuffer_storage(
+                gl::RENDERBUFFER,
+                gl::DEPTH_COMPONENT24,
+                texture.width as gl::GLsizei,
+                texture.height as gl::GLsizei,
+            );
+        }
 
         for (fbo_index, fbo_id) in texture.fbo_ids.iter().enumerate() {
             self.gl.bind_framebuffer(gl::FRAMEBUFFER, fbo_id.0);
             self.gl.framebuffer_texture_layer(
                 gl::FRAMEBUFFER,
                 gl::COLOR_ATTACHMENT0,
                 texture.id,
                 0,
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -1,32 +1,32 @@
 
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BuiltDisplayListIter, ClipAndScrollInfo, ClipId, ColorF, ComplexClipRegion};
 use api::{DeviceUintRect, DeviceUintSize, DisplayItemRef, Epoch, FilterOp};
 use api::{ImageDisplayItem, ItemRange, LayerPoint, LayerPrimitiveInfo, LayerRect};
-use api::{LayerSize, LayerToScrollTransform, LayerVector2D};
-use api::{LayoutRect, LayoutSize, LayoutTransform};
+use api::{LayerSize, LayerVector2D};
+use api::{LayoutRect, LayoutSize};
 use api::{LocalClip, PipelineId, ScrollClamping, ScrollEventPhase, ScrollLayerState};
 use api::{ScrollLocation, ScrollPolicy, ScrollSensitivity, SpecificDisplayItem, StackingContext};
 use api::{ClipMode, TileOffset, TransformStyle, WorldPoint};
 use clip::ClipRegion;
 use clip_scroll_node::StickyFrameInfo;
 use clip_scroll_tree::{ClipScrollTree, ScrollStates};
 use euclid::rect;
 use frame_builder::{FrameBuilder, FrameBuilderConfig, ScrollbarInfo};
 use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet, RendererFrame};
 use prim_store::RectangleContent;
 use profiler::{GpuCacheProfileCounters, TextureCacheProfileCounters};
 use resource_cache::{FontInstanceMap,ResourceCache, TiledImageMap};
-use scene::{Scene, StackingContextHelpers, ScenePipeline};
+use scene::{Scene, StackingContextHelpers, ScenePipeline, SceneProperties};
 use tiling::{CompositeOps, Frame};
 use util::ComplexClipRegionHelpers;
 
 #[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Eq, Ord)]
 pub struct FrameId(pub u32);
 
 static DEFAULT_SCROLLBAR_COLOR: ColorF = ColorF {
     r: 0.3,
@@ -43,16 +43,17 @@ struct FlattenContext<'a> {
     tiled_image_map: TiledImageMap,
     pipeline_epochs: Vec<(PipelineId, Epoch)>,
     replacements: Vec<(ClipId, ClipId)>,
     /// Opaque rectangle vector, stored here in order to
     /// avoid re-allocation on each use.
     opaque_parts: Vec<LayoutRect>,
     /// Same for the transparent rectangles.
     transparent_parts: Vec<LayoutRect>,
+    output_pipelines: &'a FastHashSet<PipelineId>,
 }
 
 impl<'a> FlattenContext<'a> {
     /// Since WebRender still handles fixed position and reference frame content internally
     /// we need to apply this table of id replacements only to the id that affects the
     /// position of a node. We can eventually remove this when clients start handling
     /// reference frames themselves. This method applies these replacements.
     fn apply_scroll_frame_id_replacement(&self, id: ClipId) -> ClipId {
@@ -83,31 +84,28 @@ impl<'a> FlattenContext<'a> {
     fn flatten_root(
         &mut self,
         traversal: &mut BuiltDisplayListIter<'a>,
         pipeline_id: PipelineId,
         frame_size: &LayoutSize,
         root_reference_frame_id: ClipId,
         root_scroll_frame_id: ClipId,
     ) {
+        let clip_id = ClipId::root_scroll_node(pipeline_id);
+
         self.builder.push_stacking_context(
-            &LayerVector2D::zero(),
             pipeline_id,
             CompositeOps::default(),
             TransformStyle::Flat,
             true,
             true,
+            ClipAndScrollInfo::simple(clip_id),
+            self.output_pipelines,
         );
 
-        // We do this here, rather than above because we want any of the top-level
-        // stacking contexts in the display list to be treated like root stacking contexts.
-        // FIXME(mrobinson): Currently only the first one will, which for the moment is
-        // sufficient for all our use cases.
-        self.builder.notify_waiting_for_root_stacking_context();
-
         // For the root pipeline, there's no need to add a full screen rectangle
         // here, as it's handled by the framebuffer clear.
         if self.scene.root_pipeline_id != Some(pipeline_id) {
             if let Some(pipeline) = self.scene.pipelines.get(&pipeline_id) {
                 if let Some(bg_color) = pipeline.background_color {
                     let root_bounds = LayerRect::new(LayerPoint::zero(), *frame_size);
                     let info = LayerPrimitiveInfo::new(root_bounds);
                     self.builder.add_solid_rectangle(
@@ -116,17 +114,21 @@ impl<'a> FlattenContext<'a> {
                         RectangleContent::Fill(bg_color),
                         None,
                     );
                 }
             }
         }
 
 
-        self.flatten_items(traversal, pipeline_id, LayerVector2D::zero());
+        self.flatten_items(
+            traversal,
+            pipeline_id,
+            LayerVector2D::zero(),
+        );
 
         if self.builder.config.enable_scrollbars {
             let scrollbar_rect = LayerRect::new(LayerPoint::zero(), LayerSize::new(10.0, 70.0));
             let container_rect = LayerRect::new(LayerPoint::zero(), *frame_size);
             self.builder.add_solid_rectangle(
                 ClipAndScrollInfo::simple(root_reference_frame_id),
                 &LayerPrimitiveInfo::new(scrollbar_rect),
                 RectangleContent::Fill(DEFAULT_SCROLLBAR_COLOR),
@@ -149,17 +151,21 @@ impl<'a> FlattenContext<'a> {
                     Some(item) => item,
                     None => break,
                 };
 
                 if SpecificDisplayItem::PopStackingContext == *item.item() {
                     return;
                 }
 
-                self.flatten_item(item, pipeline_id, reference_frame_relative_offset)
+                self.flatten_item(
+                    item,
+                    pipeline_id,
+                    reference_frame_relative_offset,
+                )
             };
 
             // If flatten_item created a sub-traversal, we need `traversal` to have the
             // same state as the completed subtraversal, so we reinitialize it here.
             if let Some(subtraversal) = subtraversal {
                 *traversal = subtraversal;
             }
         }
@@ -235,17 +241,16 @@ impl<'a> FlattenContext<'a> {
                 .pipelines
                 .get(&pipeline_id)
                 .expect("No display list?!")
                 .display_list;
             CompositeOps::new(
                 stacking_context.filter_ops_for_compositing(
                     display_list,
                     filters,
-                    &self.scene.properties,
                 ),
                 stacking_context.mix_blend_mode_for_compositing(),
             )
         };
 
         if stacking_context.scroll_policy == ScrollPolicy::Fixed {
             self.replacements.push((
                 context_scroll_node_id,
@@ -253,53 +258,48 @@ impl<'a> FlattenContext<'a> {
             ));
         }
 
         // If we have a transformation, we establish a new reference frame. This means
         // that fixed position stacking contexts are positioned relative to us.
         let is_reference_frame =
             stacking_context.transform.is_some() || stacking_context.perspective.is_some();
         if is_reference_frame {
-            let transform = stacking_context.transform.as_ref();
-            let transform = self.scene.properties.resolve_layout_transform(transform);
-            let perspective = stacking_context
-                .perspective
-                .unwrap_or_else(LayoutTransform::identity);
             let origin = reference_frame_relative_offset + bounds.origin.to_vector();
-            let transform = LayerToScrollTransform::create_translation(origin.x, origin.y, 0.0)
-                .pre_mul(&transform)
-                .pre_mul(&perspective);
-
             let reference_frame_bounds = LayerRect::new(LayerPoint::zero(), bounds.size);
             let mut clip_id = self.apply_scroll_frame_id_replacement(context_scroll_node_id);
             clip_id = self.builder.push_reference_frame(
                 Some(clip_id),
                 pipeline_id,
                 &reference_frame_bounds,
-                &transform,
+                stacking_context.transform,
+                stacking_context.perspective,
                 origin,
                 false,
                 self.clip_scroll_tree,
             );
             self.replacements.push((context_scroll_node_id, clip_id));
             reference_frame_relative_offset = LayerVector2D::zero();
         } else {
             reference_frame_relative_offset = LayerVector2D::new(
                 reference_frame_relative_offset.x + bounds.origin.x,
                 reference_frame_relative_offset.y + bounds.origin.y,
             );
-        }
+        };
+
+        let sc_scroll_node_id = self.apply_scroll_frame_id_replacement(context_scroll_node_id);
 
         self.builder.push_stacking_context(
-            &reference_frame_relative_offset,
             pipeline_id,
             composition_operations,
             stacking_context.transform_style,
             is_backface_visible,
             false,
+            ClipAndScrollInfo::simple(sc_scroll_node_id),
+            self.output_pipelines,
         );
 
         self.flatten_items(
             traversal,
             pipeline_id,
             reference_frame_relative_offset,
         );
 
@@ -342,22 +342,22 @@ impl<'a> FlattenContext<'a> {
             clip_region,
             self.clip_scroll_tree,
         );
 
         self.pipeline_epochs.push((pipeline_id, pipeline.epoch));
 
         let iframe_rect = LayerRect::new(LayerPoint::zero(), bounds.size);
         let origin = reference_frame_relative_offset + bounds.origin.to_vector();
-        let transform = LayerToScrollTransform::create_translation(origin.x, origin.y, 0.0);
         let iframe_reference_frame_id = self.builder.push_reference_frame(
             Some(clip_id),
             pipeline_id,
             &iframe_rect,
-            &transform,
+            None,
+            None,
             origin,
             true,
             self.clip_scroll_tree,
         );
 
         self.builder.add_scroll_frame(
             ClipId::root_scroll_node(pipeline_id),
             iframe_reference_frame_id,
@@ -513,16 +513,17 @@ impl<'a> FlattenContext<'a> {
             }
             SpecificDisplayItem::BoxShadow(ref box_shadow_info) => {
                 let bounds = box_shadow_info
                     .box_bounds
                     .translate(&reference_frame_relative_offset);
                 let mut prim_info = prim_info.clone();
                 prim_info.rect = bounds;
                 self.builder.add_box_shadow(
+                    pipeline_id,
                     clip_and_scroll,
                     &prim_info,
                     &box_shadow_info.offset,
                     &box_shadow_info.color,
                     box_shadow_info.blur_radius,
                     box_shadow_info.spread_radius,
                     box_shadow_info.border_radius,
                     box_shadow_info.clip_mode,
@@ -1083,16 +1084,17 @@ impl FrameContext {
     pub fn create(
         &mut self,
         old_builder: Option<FrameBuilder>,
         scene: &Scene,
         resource_cache: &mut ResourceCache,
         window_size: DeviceUintSize,
         inner_rect: DeviceUintRect,
         device_pixel_ratio: f32,
+        output_pipelines: &FastHashSet<PipelineId>,
     ) -> Option<FrameBuilder> {
         let root_pipeline_id = match scene.root_pipeline_id {
             Some(root_pipeline_id) => root_pipeline_id,
             None => return old_builder,
         };
 
         let root_pipeline = match scene.pipelines.get(&root_pipeline_id) {
             Some(root_pipeline) => root_pipeline,
@@ -1123,16 +1125,17 @@ impl FrameContext {
                 ),
                 clip_scroll_tree: &mut self.clip_scroll_tree,
                 font_instances: resource_cache.get_font_instances(),
                 tiled_image_map: resource_cache.get_tiled_image_map(),
                 pipeline_epochs: Vec::new(),
                 replacements: Vec::new(),
                 opaque_parts: Vec::new(),
                 transparent_parts: Vec::new(),
+                output_pipelines,
             };
 
             roller.builder.push_root(
                 root_pipeline_id,
                 &root_pipeline.viewport_size,
                 &root_pipeline.content_size,
                 roller.clip_scroll_tree,
             );
@@ -1149,16 +1152,18 @@ impl FrameContext {
             roller.flatten_root(
                 &mut root_pipeline.display_list.iter(),
                 root_pipeline_id,
                 &root_pipeline.viewport_size,
                 reference_frame_id,
                 scroll_frame_id,
             );
 
+            debug_assert!(roller.builder.picture_stack.is_empty());
+
             self.pipeline_epoch_map.extend(roller.pipeline_epochs.drain(..));
             roller.builder
         };
 
         self.clip_scroll_tree
             .finalize_and_apply_pending_scroll_offsets(old_scrolling_states);
         Some(frame_builder)
     }
@@ -1175,31 +1180,31 @@ impl FrameContext {
     pub fn build_renderer_frame(
         &mut self,
         frame_builder: &mut FrameBuilder,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         device_pixel_ratio: f32,
         pan: LayerPoint,
-        output_pipelines: &FastHashSet<PipelineId>,
         texture_cache_profile: &mut TextureCacheProfileCounters,
         gpu_cache_profile: &mut GpuCacheProfileCounters,
+        scene_properties: &SceneProperties,
     ) -> RendererFrame {
         let frame = frame_builder.build(
             resource_cache,
             gpu_cache,
             self.id,
             &mut self.clip_scroll_tree,
             pipelines,
             device_pixel_ratio,
             pan,
-            output_pipelines,
             texture_cache_profile,
             gpu_cache_profile,
+            scene_properties,
         );
 
         self.get_renderer_frame_impl(Some(frame))
     }
 
     pub fn get_renderer_frame(&self) -> RendererFrame {
         self.get_renderer_frame_impl(None)
     }
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -1,87 +1,76 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderDetails, BorderDisplayItem, BuiltDisplayList};
-use api::{ClipAndScrollInfo, ClipId, ColorF, PremultipliedColorF};
+use api::{ClipAndScrollInfo, ClipId, ColorF, PropertyBinding};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceUintRect, DeviceUintSize};
-use api::{ExtendMode, FilterOp, FontRenderMode};
+use api::{ExtendMode, FontRenderMode, LayoutTransform};
 use api::{GlyphInstance, GlyphOptions, GradientStop, HitTestFlags, HitTestItem, HitTestResult};
 use api::{ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerPrimitiveInfo, LayerRect};
-use api::{LayerPixel, LayerSize, LayerToScrollTransform, LayerVector2D, LayoutVector2D, LineOrientation};
+use api::{LayerSize, LayerToScrollTransform, LayerVector2D, LayoutVector2D, LineOrientation};
 use api::{LineStyle, LocalClip, PipelineId, RepeatMode};
 use api::{ScrollSensitivity, Shadow, TileOffset, TransformStyle};
-use api::{WorldPixel, WorldPoint, YuvColorSpace, YuvData, device_length};
+use api::{WorldPoint, YuvColorSpace, YuvData};
 use app_units::Au;
 use border::ImageBorderSegment;
-use clip::{ClipRegion, ClipSource, ClipSources, ClipStore, Contains};
+use clip::{ClipRegion, ClipSource, ClipSources, ClipStore, Contains, MAX_CLIP};
 use clip_scroll_node::{ClipScrollNode, NodeType};
 use clip_scroll_tree::ClipScrollTree;
-use euclid::{SideOffsets2D, TypedTransform3D, vec2, vec3};
+use euclid::{SideOffsets2D, vec2};
 use frame::FrameId;
 use glyph_rasterizer::FontInstance;
 use gpu_cache::GpuCache;
-use internal_types::{FastHashMap, FastHashSet, HardwareCompositeOp};
-use picture::{PictureKind, PicturePrimitive};
-use plane_split::{BspSplitter, Polygon, Splitter};
+use internal_types::{FastHashMap, FastHashSet};
+use picture::{PictureCompositeMode, PictureKind, PicturePrimitive};
 use prim_store::{TexelRect, YuvImagePrimitiveCpu};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, LinePrimitive, PrimitiveKind};
-use prim_store::{PrimitiveContainer, PrimitiveIndex, PrimitiveRun};
+use prim_store::{PrimitiveContainer, PrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
 use prim_store::{RectangleContent, RectanglePrimitive, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
-use render_task::{AlphaRenderItem, ClearMode, RenderTask, RenderTaskId, RenderTaskLocation};
+use render_task::{RenderTask, RenderTaskLocation};
 use render_task::RenderTaskTree;
 use resource_cache::ResourceCache;
-use scene::ScenePipeline;
+use scene::{ScenePipeline, SceneProperties};
 use std::{mem, usize, f32, i32};
-use tiling::{CompositeOps, ContextIsolation, Frame, PrimitiveRunCmd, RenderPass};
-use tiling::{RenderTargetContext, RenderTargetKind, ScrollbarPrimitive, StackingContext};
-use tiling::StackingContextIndex;
+use tiling::{CompositeOps, Frame};
+use tiling::{RenderPass};
+use tiling::{RenderTargetContext, ScrollbarPrimitive};
 use util::{self, pack_as_float, RectHelpers, recycle_vec};
-use box_shadow::BLUR_SAMPLE_SCALE;
 
 #[derive(Debug)]
 pub struct ScrollbarInfo(pub ClipId, pub LayerRect);
 
-/// Construct a polygon from stacking context boundaries.
-/// `anchor` here is an index that's going to be preserved in all the
-/// splits of the polygon.
-fn make_polygon(
-    stacking_context: &StackingContext,
-    node: &ClipScrollNode,
-    anchor: usize,
-) -> Polygon<f64, WorldPixel> {
-    //TODO: only work with `isolated_items_bounds.size` worth of space
-    // This can be achieved by moving the `origin` shift
-    // from the primitive local coordinates into the layer transformation.
-    // Which in turn needs it to be a render task property obeyed by all primitives
-    // upon rendering, possibly not limited to `write_*_vertex` implementations.
-    let size = stacking_context.isolated_items_bounds.bottom_right();
-    let bounds = LayerRect::new(LayerPoint::zero(), LayerSize::new(size.x, size.y));
-    let mat = TypedTransform3D::row_major(
-        node.world_content_transform.m11 as f64,
-        node.world_content_transform.m12 as f64,
-        node.world_content_transform.m13 as f64,
-        node.world_content_transform.m14 as f64,
-        node.world_content_transform.m21 as f64,
-        node.world_content_transform.m22 as f64,
-        node.world_content_transform.m23 as f64,
-        node.world_content_transform.m24 as f64,
-        node.world_content_transform.m31 as f64,
-        node.world_content_transform.m32 as f64,
-        node.world_content_transform.m33 as f64,
-        node.world_content_transform.m34 as f64,
-        node.world_content_transform.m41 as f64,
-        node.world_content_transform.m42 as f64,
-        node.world_content_transform.m43 as f64,
-        node.world_content_transform.m44 as f64);
-    Polygon::from_transformed_rect(bounds.cast().unwrap(), mat, anchor)
+/// Properties of a stacking context that are maintained
+/// during creation of the scene. These structures are
+/// not persisted after the initial scene build.
+struct StackingContext {
+    /// Pipeline this stacking context belongs to.
+    pipeline_id: PipelineId,
+
+    /// Filters / mix-blend-mode effects
+    composite_ops: CompositeOps,
+
+    /// If true, visible when backface is visible.
+    is_backface_visible: bool,
+
+    /// Allow subpixel AA for text runs on this stacking context.
+    /// This is a temporary hack while we don't support subpixel AA
+    /// on transparent stacking contexts.
+    allow_subpixel_aa: bool,
+
+    /// CSS transform-style property.
+    transform_style: TransformStyle,
+
+    /// The primitive index for the root Picture primitive
+    /// that this stacking context is mapped to.
+    pic_prim_index: PrimitiveIndex,
 }
 
 #[derive(Clone, Copy)]
 pub struct FrameBuilderConfig {
     pub enable_scrollbars: bool,
     pub default_font_render_mode: FontRenderMode,
     pub debug: bool,
 }
@@ -106,52 +95,49 @@ impl HitTestingItem {
 pub struct HitTestingRun(Vec<HitTestingItem>, ClipAndScrollInfo);
 
 /// A builder structure for `RendererFrame`
 pub struct FrameBuilder {
     screen_size: DeviceUintSize,
     background_color: Option<ColorF>,
     prim_store: PrimitiveStore,
     pub clip_store: ClipStore,
-    cmds: Vec<PrimitiveRunCmd>,
     hit_testing_runs: Vec<HitTestingRun>,
     pub config: FrameBuilderConfig,
 
-    stacking_context_store: Vec<StackingContext>,
-
     // A stack of the current shadow primitives.
     // The sub-Vec stores a buffer of fast-path primitives to be appended on pop.
     shadow_prim_stack: Vec<(PrimitiveIndex, Vec<(PrimitiveIndex, ClipAndScrollInfo)>)>,
     // If we're doing any fast-path shadows, we buffer the "real"
     // content here, to be appended when the shadow stack is empty.
     pending_shadow_contents: Vec<(PrimitiveIndex, ClipAndScrollInfo, LayerPrimitiveInfo)>,
 
     scrollbar_prims: Vec<ScrollbarPrimitive>,
 
     /// A stack of scroll nodes used during display list processing to properly
     /// parent new scroll nodes.
     reference_frame_stack: Vec<ClipId>,
 
-    /// A stack of stacking contexts used for creating ClipScrollGroups as
-    /// primitives are added to the frame.
-    stacking_context_stack: Vec<StackingContextIndex>,
+    /// A stack of the current pictures, used during scene building.
+    pub picture_stack: Vec<PrimitiveIndex>,
 
-    /// Whether or not we've pushed a root stacking context for the current pipeline.
-    has_root_stacking_context: bool,
+    /// A temporary stack of stacking context properties, used only
+    /// during scene building.
+    sc_stack: Vec<StackingContext>,
 }
 
 pub struct PrimitiveContext<'a> {
     pub device_pixel_ratio: f32,
     pub display_list: &'a BuiltDisplayList,
     pub clip_node: &'a ClipScrollNode,
     pub scroll_node: &'a ClipScrollNode,
 }
 
 impl<'a> PrimitiveContext<'a> {
-    fn new(
+    pub fn new(
         device_pixel_ratio: f32,
         display_list: &'a BuiltDisplayList,
         clip_node: &'a ClipScrollNode,
         scroll_node: &'a ClipScrollNode,
     ) -> Self {
         PrimitiveContext {
             device_pixel_ratio,
             display_list,
@@ -165,46 +151,42 @@ impl FrameBuilder {
     pub fn new(
         previous: Option<Self>,
         screen_size: DeviceUintSize,
         background_color: Option<ColorF>,
         config: FrameBuilderConfig,
     ) -> Self {
         match previous {
             Some(prev) => FrameBuilder {
-                stacking_context_store: recycle_vec(prev.stacking_context_store),
-                cmds: recycle_vec(prev.cmds),
                 hit_testing_runs: recycle_vec(prev.hit_testing_runs),
                 shadow_prim_stack: recycle_vec(prev.shadow_prim_stack),
                 pending_shadow_contents: recycle_vec(prev.pending_shadow_contents),
                 scrollbar_prims: recycle_vec(prev.scrollbar_prims),
                 reference_frame_stack: recycle_vec(prev.reference_frame_stack),
-                stacking_context_stack: recycle_vec(prev.stacking_context_stack),
+                picture_stack: recycle_vec(prev.picture_stack),
+                sc_stack: recycle_vec(prev.sc_stack),
                 prim_store: prev.prim_store.recycle(),
                 clip_store: prev.clip_store.recycle(),
                 screen_size,
                 background_color,
                 config,
-                has_root_stacking_context: false,
             },
             None => FrameBuilder {
-                stacking_context_store: Vec::new(),
-                cmds: Vec::new(),
                 hit_testing_runs: Vec::new(),
                 shadow_prim_stack: Vec::new(),
                 pending_shadow_contents: Vec::new(),
                 scrollbar_prims: Vec::new(),
                 reference_frame_stack: Vec::new(),
-                stacking_context_stack: Vec::new(),
+                picture_stack: Vec::new(),
+                sc_stack: Vec::new(),
                 prim_store: PrimitiveStore::new(),
                 clip_store: ClipStore::new(),
                 screen_size,
                 background_color,
                 config,
-                has_root_stacking_context: false,
             },
         }
     }
 
     /// Create a primitive and add it to the prim store. This method doesn't
     /// add the primitive to the draw list, so can be used for creating
     /// sub-primitives.
     pub fn create_primitive(
@@ -217,21 +199,23 @@ impl FrameBuilder {
             clip_sources.push(ClipSource::Rectangle(main));
             clip_sources.push(ClipSource::RoundedRectangle(
                 region.rect,
                 region.radii,
                 region.mode,
             ));
         }
 
+        let stacking_context = self.sc_stack.last().expect("bug: no stacking context!");
+
         let clip_sources = self.clip_store.insert(ClipSources::new(clip_sources));
         let prim_index = self.prim_store.add_primitive(
             &info.rect,
             &info.local_clip.clip_rect(),
-            info.is_backface_visible,
+            info.is_backface_visible && stacking_context.is_backface_visible,
             clip_sources,
             info.tag,
             container,
         );
 
         prim_index
     }
 
@@ -259,36 +243,26 @@ impl FrameBuilder {
     }
 
     /// Add an already created primitive to the draw lists.
     pub fn add_primitive_to_draw_list(
         &mut self,
         prim_index: PrimitiveIndex,
         clip_and_scroll: ClipAndScrollInfo,
     ) {
-        match self.cmds.last_mut().unwrap() {
-            &mut PrimitiveRunCmd::PrimitiveRun(
-                ref mut run,
-            ) => if run.clip_and_scroll == clip_and_scroll &&
-                run.base_prim_index.0 + run.count == prim_index.0
-            {
-                run.count += 1;
-                return;
-            },
-            &mut PrimitiveRunCmd::PushStackingContext(..) |
-            &mut PrimitiveRunCmd::PopStackingContext => {}
-        }
-
-        let run = PrimitiveRun {
-            base_prim_index: prim_index,
-            count: 1,
-            clip_and_scroll,
-        };
-
-        self.cmds.push(PrimitiveRunCmd::PrimitiveRun(run));
+        // Add primitive to the top-most Picture on the stack.
+        // TODO(gw): Let's consider removing the extra indirection
+        //           needed to get a specific primitive index...
+        let pic_prim_index = self.picture_stack.last().unwrap();
+        let metadata = &self.prim_store.cpu_metadata[pic_prim_index.0];
+        let pic = &mut self.prim_store.cpu_pictures[metadata.cpu_prim_index.0];
+        pic.add_primitive(
+            prim_index,
+            clip_and_scroll
+        );
     }
 
     /// Convenience interface that creates a primitive entry and adds it
     /// to the draw list.
     pub fn add_primitive(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
@@ -297,88 +271,309 @@ impl FrameBuilder {
     ) -> PrimitiveIndex {
         self.add_primitive_to_hit_testing_list(info, clip_and_scroll);
         let prim_index = self.create_primitive(info, clip_sources, container);
 
         self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
         prim_index
     }
 
-    pub fn notify_waiting_for_root_stacking_context(&mut self) {
-        self.has_root_stacking_context = false;
-    }
-
     pub fn push_stacking_context(
         &mut self,
-        reference_frame_offset: &LayerVector2D,
         pipeline_id: PipelineId,
         composite_ops: CompositeOps,
         transform_style: TransformStyle,
         is_backface_visible: bool,
         is_pipeline_root: bool,
+        clip_and_scroll: ClipAndScrollInfo,
+        output_pipelines: &FastHashSet<PipelineId>,
     ) {
-        if let Some(parent_index) = self.stacking_context_stack.last() {
-            let parent_is_root = self.stacking_context_store[parent_index.0].is_page_root;
+        // Construct the necessary set of Picture primitives
+        // to draw this stacking context.
+        let current_reference_frame_id = self.current_reference_frame_id();
+
+        // An arbitrary large clip rect. For now, we don't
+        // specify a clip specific to the stacking context.
+        // However, now that they are represented as Picture
+        // primitives, we can apply any kind of clip mask
+        // to them, as for a normal primitive. This is needed
+        // to correctly handle some CSS cases (see #1957).
+        let max_clip = LayerRect::new(
+            LayerPoint::new(-MAX_CLIP, -MAX_CLIP),
+            LayerSize::new(2.0 * MAX_CLIP, 2.0 * MAX_CLIP),
+        );
+
+        // If there is no root picture, create one for the main framebuffer.
+        if self.sc_stack.is_empty() {
+            // Should be no pictures at all if the stack is empty...
+            debug_assert!(self.prim_store.cpu_pictures.is_empty());
+            debug_assert_eq!(transform_style, TransformStyle::Flat);
+
+            // This picture stores primitive runs for items on the
+            // main framebuffer.
+            let pic = PicturePrimitive::new_image(
+                None,
+                false,
+                pipeline_id,
+                current_reference_frame_id,
+                None,
+            );
+
+            // No clip sources needed for the main framebuffer.
+            let clip_sources = self.clip_store.insert(ClipSources::new(Vec::new()));
 
-            if composite_ops.mix_blend_mode.is_some() && !parent_is_root {
-                // the parent stacking context of a stacking context with mix-blend-mode
-                // must be drawn with a transparent background, unless the parent stacking context
-                // is the root of the page
-                let isolation = &mut self.stacking_context_store[parent_index.0].isolation;
-                if *isolation != ContextIsolation::None {
-                    error!(
-                        "Isolation conflict detected on {:?}: {:?}",
-                        parent_index,
-                        *isolation
-                    );
+            // Add root picture primitive. The provided layer rect
+            // is zero, because we don't yet know the size of the
+            // picture. Instead, this is calculated recursively
+            // when we cull primitives.
+            let prim_index = self.prim_store.add_primitive(
+                &LayerRect::zero(),
+                &max_clip,
+                true,
+                clip_sources,
+                None,
+                PrimitiveContainer::Picture(pic),
+            );
+
+            self.picture_stack.push(prim_index);
+        } else if composite_ops.mix_blend_mode.is_some() && self.sc_stack.len() > 2 {
+            // If we have a mix-blend-mode, and we aren't the primary framebuffer,
+            // the stacking context needs to be isolated to blend correctly as per
+            // the CSS spec.
+            // TODO(gw): The way we detect not being the primary framebuffer (len > 2)
+            //           is hacky and depends on how we create a root stacking context
+            //           during flattening.
+            let current_pic_prim_index = self.picture_stack.last().unwrap();
+            let pic_cpu_prim_index = self.prim_store.cpu_metadata[current_pic_prim_index.0].cpu_prim_index;
+            let parent_pic = &mut self.prim_store.cpu_pictures[pic_cpu_prim_index.0];
+
+            match parent_pic.kind {
+                PictureKind::Image { ref mut composite_mode, .. } => {
+                    // If not already isolated for some other reason,
+                    // make this picture as isolated.
+                    if composite_mode.is_none() {
+                        *composite_mode = Some(PictureCompositeMode::Blit);
+                    }
                 }
-                *isolation = ContextIsolation::Full;
+                PictureKind::TextShadow { .. } |
+                PictureKind::BoxShadow { .. } => {
+                    panic!("bug: text/box pictures invalid here");
+                }
             }
         }
 
-        let stacking_context_index = StackingContextIndex(self.stacking_context_store.len());
-        let reference_frame_id = self.current_reference_frame_id();
-        self.stacking_context_store.push(StackingContext::new(
+        // Get the transform-style of the parent stacking context,
+        // which determines if we *might* need to draw this on
+        // an intermediate surface for plane splitting purposes.
+        let parent_transform_style = match self.sc_stack.last() {
+            Some(sc) => sc.transform_style,
+            None => TransformStyle::Flat,
+        };
+
+        // If either the parent or this stacking context is preserve-3d
+        // then we are in a 3D context.
+        let is_in_3d_context = composite_ops.count() == 0 &&
+                               (parent_transform_style == TransformStyle::Preserve3D ||
+                                transform_style == TransformStyle::Preserve3D);
+
+        // TODO(gw): For now, we don't handle filters and mix-blend-mode when there
+        //           is a 3D rendering context. We can easily do this in the future
+        //           by creating a chain of pictures for the effects, and ensuring
+        //           that the last composited picture is what's used as the input to
+        //           the plane splitting code.
+        let mut parent_pic_prim_index = if is_in_3d_context {
+            // If we're in a 3D context, we will parent the picture
+            // to the first stacking context we find in the stack that
+            // is transform-style: flat. This follows the spec
+            // by hoisting these items out into the same 3D context
+            // for plane splitting.
+            self.sc_stack
+                .iter()
+                .rev()
+                .find(|sc| sc.transform_style == TransformStyle::Flat)
+                .map(|sc| sc.pic_prim_index)
+                .unwrap()
+        } else {
+            *self.picture_stack.last().unwrap()
+        };
+
+        // For each filter, create a new image with that composite mode.
+        for filter in &composite_ops.filters {
+            let src_prim = PicturePrimitive::new_image(
+                Some(PictureCompositeMode::Filter(*filter)),
+                false,
+                pipeline_id,
+                current_reference_frame_id,
+                None,
+            );
+            let src_clip_sources = self.clip_store.insert(ClipSources::new(Vec::new()));
+
+            let src_prim_index = self.prim_store.add_primitive(
+                &LayerRect::zero(),
+                &max_clip,
+                is_backface_visible,
+                src_clip_sources,
+                None,
+                PrimitiveContainer::Picture(src_prim),
+            );
+
+            let pic_prim_index = self.prim_store.cpu_metadata[parent_pic_prim_index.0].cpu_prim_index;
+            parent_pic_prim_index = src_prim_index;
+            let pic = &mut self.prim_store.cpu_pictures[pic_prim_index.0];
+            pic.add_primitive(
+                src_prim_index,
+                clip_and_scroll,
+            );
+
+            self.picture_stack.push(src_prim_index);
+        }
+
+        // Same for mix-blend-mode.
+        if let Some(mix_blend_mode) = composite_ops.mix_blend_mode {
+            let src_prim = PicturePrimitive::new_image(
+                Some(PictureCompositeMode::MixBlend(mix_blend_mode)),
+                false,
+                pipeline_id,
+                current_reference_frame_id,
+                None,
+            );
+            let src_clip_sources = self.clip_store.insert(ClipSources::new(Vec::new()));
+
+            let src_prim_index = self.prim_store.add_primitive(
+                &LayerRect::zero(),
+                &max_clip,
+                is_backface_visible,
+                src_clip_sources,
+                None,
+                PrimitiveContainer::Picture(src_prim),
+            );
+
+            let pic_prim_index = self.prim_store.cpu_metadata[parent_pic_prim_index.0].cpu_prim_index;
+            parent_pic_prim_index = src_prim_index;
+            let pic = &mut self.prim_store.cpu_pictures[pic_prim_index.0];
+            pic.add_primitive(
+                src_prim_index,
+                clip_and_scroll,
+            );
+
+            self.picture_stack.push(src_prim_index);
+        }
+
+        // By default, this picture will be collapsed into
+        // the owning target.
+        let mut composite_mode = None;
+        let mut frame_output_pipeline_id = None;
+
+        // If this stacking context if the root of a pipeline, and the caller
+        // has requested it as an output frame, create a render task to isolate it.
+        if is_pipeline_root && output_pipelines.contains(&pipeline_id) {
+            composite_mode = Some(PictureCompositeMode::Blit);
+            frame_output_pipeline_id = Some(pipeline_id);
+        }
+
+        if is_in_3d_context {
+            // TODO(gw): For now, as soon as this picture is in
+            //           a 3D context, we draw it to an intermediate
+            //           surface and apply plane splitting. However,
+            //           there is a large optimization opportunity here.
+            //           During culling, we can check if there is actually
+            //           perspective present, and skip the plane splitting
+            //           completely when that is not the case.
+            composite_mode = Some(PictureCompositeMode::Blit);
+        }
+
+        // Add picture for this actual stacking context contents to render into.
+        let sc_prim = PicturePrimitive::new_image(
+            composite_mode,
+            is_in_3d_context,
             pipeline_id,
-            *reference_frame_offset,
-            !self.has_root_stacking_context,
-            is_pipeline_root,
-            reference_frame_id,
-            transform_style,
+            current_reference_frame_id,
+            frame_output_pipeline_id,
+        );
+
+        let sc_clip_sources = self.clip_store.insert(ClipSources::new(Vec::new()));
+        let sc_prim_index = self.prim_store.add_primitive(
+            &LayerRect::zero(),
+            &max_clip,
+            is_backface_visible,
+            sc_clip_sources,
+            None,
+            PrimitiveContainer::Picture(sc_prim),
+        );
+
+        let pic_prim_index = self.prim_store.cpu_metadata[parent_pic_prim_index.0].cpu_prim_index;
+        let sc_pic = &mut self.prim_store.cpu_pictures[pic_prim_index.0];
+        sc_pic.add_primitive(
+            sc_prim_index,
+            clip_and_scroll,
+        );
+
+        // Add this as the top-most picture for primitives to be added to.
+        self.picture_stack.push(sc_prim_index);
+
+        // TODO(gw): This is super conservative. We can expand on this a lot
+        //           once all the picture code is in place and landed.
+        let allow_subpixel_aa = composite_ops.count() == 0 &&
+                                transform_style == TransformStyle::Flat;
+
+        // Push the SC onto the stack, so we know how to handle things in
+        // pop_stacking_context.
+        let sc = StackingContext {
             composite_ops,
             is_backface_visible,
-        ));
-        self.has_root_stacking_context = true;
-        self.cmds
-            .push(PrimitiveRunCmd::PushStackingContext(stacking_context_index));
-        self.stacking_context_stack.push(stacking_context_index);
+            pipeline_id,
+            allow_subpixel_aa,
+            transform_style,
+            // TODO(gw): This is not right when filters are present (but we
+            //           don't handle that right now, per comment above).
+            pic_prim_index: sc_prim_index,
+        };
+
+        self.sc_stack.push(sc);
     }
 
     pub fn pop_stacking_context(&mut self) {
-        self.cmds.push(PrimitiveRunCmd::PopStackingContext);
-        self.stacking_context_stack.pop();
+        let sc = self.sc_stack.pop().unwrap();
+
+        // Remove the picture for this stacking contents.
+        self.picture_stack.pop().expect("bug");
+
+        // Remove the picture for any filter/mix-blend-mode effects.
+        for _ in 0 .. sc.composite_ops.count() {
+            self.picture_stack.pop().expect("bug: mismatched picture stack");
+        }
+
+        // By the time the stacking context stack is empty, we should
+        // also have cleared the picture stack.
+        if self.sc_stack.is_empty() {
+            self.picture_stack.pop().expect("bug: picture stack invalid");
+            debug_assert!(self.picture_stack.is_empty());
+        }
+
         assert!(
             self.shadow_prim_stack.is_empty(),
             "Found unpopped text shadows when popping stacking context!"
         );
     }
 
     pub fn push_reference_frame(
         &mut self,
         parent_id: Option<ClipId>,
         pipeline_id: PipelineId,
         rect: &LayerRect,
-        transform: &LayerToScrollTransform,
+        source_transform: Option<PropertyBinding<LayoutTransform>>,
+        source_perspective: Option<LayoutTransform>,
         origin_in_parent_reference_frame: LayerVector2D,
         root_for_pipeline: bool,
         clip_scroll_tree: &mut ClipScrollTree,
     ) -> ClipId {
         let new_id = clip_scroll_tree.add_reference_frame(
             rect,
-            transform,
+            source_transform,
+            source_perspective,
             origin_in_parent_reference_frame,
             pipeline_id,
             parent_id,
             root_for_pipeline,
         );
         self.reference_frame_stack.push(new_id);
         new_id
     }
@@ -412,17 +607,17 @@ impl FrameBuilder {
         let viewport_clip = LayerRect::new(
             LayerPoint::new(-viewport_offset.x, -viewport_offset.y),
             LayerSize::new(clip_size.width, clip_size.height),
         );
 
         let root_id = clip_scroll_tree.root_reference_frame_id();
         if let Some(root_node) = clip_scroll_tree.nodes.get_mut(&root_id) {
             if let NodeType::ReferenceFrame(ref mut info) = root_node.node_type {
-                info.transform = LayerToScrollTransform::create_translation(
+                info.resolved_transform = LayerToScrollTransform::create_translation(
                     viewport_offset.x,
                     viewport_offset.y,
                     0.0,
                 );
             }
             root_node.local_clip_rect = viewport_clip;
         }
 
@@ -435,22 +630,22 @@ impl FrameBuilder {
     pub fn push_root(
         &mut self,
         pipeline_id: PipelineId,
         viewport_size: &LayerSize,
         content_size: &LayerSize,
         clip_scroll_tree: &mut ClipScrollTree,
     ) -> ClipId {
         let viewport_rect = LayerRect::new(LayerPoint::zero(), *viewport_size);
-        let identity = &LayerToScrollTransform::identity();
         self.push_reference_frame(
             None,
             pipeline_id,
             &viewport_rect,
-            identity,
+            None,
+            None,
             LayerVector2D::zero(),
             true,
             clip_scroll_tree,
         );
 
         let topmost_scrolling_node_id = ClipId::root_scroll_node(pipeline_id);
         clip_scroll_tree.topmost_scrolling_node_id = topmost_scrolling_node_id;
 
@@ -511,17 +706,18 @@ impl FrameBuilder {
     }
 
     pub fn push_shadow(
         &mut self,
         shadow: Shadow,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
     ) {
-        let prim = PicturePrimitive::new_text_shadow(shadow);
+        let pipeline_id = self.sc_stack.last().unwrap().pipeline_id;
+        let prim = PicturePrimitive::new_text_shadow(shadow, pipeline_id);
 
         // Create an empty shadow primitive. Insert it into
         // the draw lists immediately so that it will be drawn
         // before any visual text elements that are added as
         // part of this shadow context.
         let prim_index = self.create_primitive(
             info,
             Vec::new(),
@@ -532,27 +728,17 @@ impl FrameBuilder {
         self.shadow_prim_stack.push((prim_index, pending));
     }
 
     pub fn pop_all_shadows(&mut self) {
         assert!(self.shadow_prim_stack.len() > 0, "popped shadows, but none were present");
 
         // Borrowcheck dance
         let mut shadows = mem::replace(&mut self.shadow_prim_stack, Vec::new());
-        for (prim_index, pending_primitives) in shadows.drain(..) {
-            {
-                // By now, the local rect of the text shadow has been calculated. It
-                // is calculated as the items in the shadow are added. It's now
-                // safe to offset the local rect by the offset of the shadow, which
-                // is then used when blitting the shadow to the final location.
-                let metadata = &mut self.prim_store.cpu_metadata[prim_index.0];
-                let prim = &mut self.prim_store.cpu_pictures[metadata.cpu_prim_index.0];
-                metadata.local_rect = prim.build();
-            }
-
+        for (_, pending_primitives) in shadows.drain(..) {
             // Push any fast-path shadows now
             for (prim_index, clip_and_scroll) in pending_primitives {
                 self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
             }
         }
 
         let mut pending_primitives = mem::replace(&mut self.pending_shadow_contents, Vec::new());
         for (prim_index, clip_and_scroll, info) in pending_primitives.drain(..) {
@@ -616,17 +802,17 @@ impl FrameBuilder {
             orientation,
         };
 
         let mut fast_shadow_prims = Vec::new();
         for (idx, &(shadow_prim_index, _)) in self.shadow_prim_stack.iter().enumerate() {
             let shadow_metadata = &self.prim_store.cpu_metadata[shadow_prim_index.0];
             let picture = &self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
             match picture.kind {
-                PictureKind::TextShadow { offset, color, blur_radius } if blur_radius == 0.0 => {
+                PictureKind::TextShadow { offset, color, blur_radius, .. } if blur_radius == 0.0 => {
                     fast_shadow_prims.push((idx, offset, color));
                 }
                 _ => {}
             }
         }
 
         for (idx, shadow_offset, shadow_color) in fast_shadow_prims {
             let mut line = line.clone();
@@ -662,17 +848,16 @@ impl FrameBuilder {
             let picture =
                 &mut self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
 
             match picture.kind {
                 // Only run real blurs here (fast path zero blurs are handled above).
                 PictureKind::TextShadow { blur_radius, .. } if blur_radius > 0.0 => {
                     picture.add_primitive(
                         prim_index,
-                        &info.rect,
                         clip_and_scroll,
                     );
                 }
                 _ => {}
             }
         }
     }
 
@@ -1048,17 +1233,16 @@ impl FrameBuilder {
         run_offset: LayoutVector2D,
         info: &LayerPrimitiveInfo,
         font: &FontInstance,
         text_color: &ColorF,
         glyph_range: ItemRange<GlyphInstance>,
         glyph_count: usize,
         glyph_options: Option<GlyphOptions>,
     ) {
-        let original_rect = info.rect;
         // Trivial early out checks
         if font.size.0 <= 0 {
             return;
         }
 
         // Sanity check - anything with glyphs bigger than this
         // is probably going to consume too much memory to render
         // efficiently anyway. This is specifically to work around
@@ -1077,23 +1261,22 @@ impl FrameBuilder {
             .limit_by(font.render_mode);
         if let Some(options) = glyph_options {
             render_mode = render_mode.limit_by(options.render_mode);
         }
 
         // There are some conditions under which we can't use
         // subpixel text rendering, even if enabled.
         if render_mode == FontRenderMode::Subpixel {
-            // text on a stacking context that has filters
+            // text on a picture that has filters
             // (e.g. opacity) can't use sub-pixel.
             // TODO(gw): It's possible we can relax this in
             //           the future, if we modify the way
             //           we handle subpixel blending.
-            if let Some(sc_index) = self.stacking_context_stack.last() {
-                let stacking_context = &self.stacking_context_store[sc_index.0];
+            if let Some(ref stacking_context) = self.sc_stack.last() {
                 if !stacking_context.allow_subpixel_aa {
                     render_mode = FontRenderMode::Alpha;
                 }
             }
         }
 
         let prim_font = FontInstance::new(
             font.font_key,
@@ -1123,17 +1306,17 @@ impl FrameBuilder {
         // *before* the visual text primitive in order to get the correct paint
         // order. Store them in a Vec first to work around borrowck issues.
         // TODO(gw): Refactor to avoid having to store them in a Vec first.
         let mut fast_shadow_prims = Vec::new();
         for (idx, &(shadow_prim_index, _)) in self.shadow_prim_stack.iter().enumerate() {
             let shadow_metadata = &self.prim_store.cpu_metadata[shadow_prim_index.0];
             let picture_prim = &self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
             match picture_prim.kind {
-                PictureKind::TextShadow { offset, color, blur_radius } if blur_radius == 0.0 => {
+                PictureKind::TextShadow { offset, color, blur_radius, .. } if blur_radius == 0.0 => {
                     let mut text_prim = prim.clone();
                     text_prim.font.color = color.into();
                     text_prim.offset += offset;
                     fast_shadow_prims.push((idx, text_prim));
                 }
                 _ => {}
             }
         }
@@ -1181,17 +1364,16 @@ impl FrameBuilder {
             let picture =
                 &mut self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
 
             match picture.kind {
                 // Only run real blurs here (fast path zero blurs are handled above).
                 PictureKind::TextShadow { blur_radius, .. } if blur_radius > 0.0 => {
                     picture.add_primitive(
                         prim_index,
-                        &original_rect,
                         clip_and_scroll,
                     );
                 }
                 _ => {}
             }
         }
     }
 
@@ -1203,21 +1385,31 @@ impl FrameBuilder {
         tile_spacing: &LayerSize,
         sub_rect: Option<TexelRect>,
         image_key: ImageKey,
         image_rendering: ImageRendering,
         tile: Option<TileOffset>,
     ) {
         let sub_rect_block = sub_rect.unwrap_or(TexelRect::invalid()).into();
 
+        // If the tile spacing is the same as the rect size,
+        // then it is effectively zero. We use this later on
+        // in prim_store to detect if an image can be considered
+        // opaque.
+        let tile_spacing = if *tile_spacing == info.rect.size {
+            LayerSize::zero()
+        } else {
+            *tile_spacing
+        };
+
         let prim_cpu = ImagePrimitiveCpu {
             image_key,
             image_rendering,
             tile_offset: tile,
-            tile_spacing: *tile_spacing,
+            tile_spacing,
             gpu_blocks: [
                 [
                     stretch_size.width,
                     stretch_size.height,
                     tile_spacing.width,
                     tile_spacing.height,
                 ].into(),
                 sub_rect_block,
@@ -1258,30 +1450,16 @@ impl FrameBuilder {
         self.add_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
             PrimitiveContainer::YuvImage(prim_cpu),
         );
     }
 
-    fn handle_push_stacking_context(&mut self, stacking_context_index: StackingContextIndex) {
-        self.stacking_context_stack.push(stacking_context_index);
-
-        // Reset bounding rect to zero. We will calculate it as we collect primitives
-        // from various scroll layers. In handle_pop_stacking_context , we use this to
-        // calculate the device bounding rect. In the future, we could cache this during
-        // the initial adding of items for the common case (where there is only a single
-        // scroll layer for items in a stacking context).
-        let stacking_context =
-            &mut self.stacking_context_store[stacking_context_index.0];
-        stacking_context.screen_bounds = DeviceIntRect::zero();
-        stacking_context.isolated_items_bounds = LayerRect::zero();
-    }
-
     pub fn hit_test(
         &self,
         clip_scroll_tree: &ClipScrollTree,
         pipeline_id: Option<PipelineId>,
         point: WorldPoint,
         flags: HitTestFlags
     ) -> HitTestResult {
         let point = if flags.contains(HitTestFlags::POINT_RELATIVE_TO_PIPELINE_VIEWPORT) {
@@ -1341,191 +1519,80 @@ impl FrameBuilder {
                 }
             }
         }
 
         result.items.dedup();
         return result;
     }
 
-
-    fn handle_primitive_run(
-        &mut self,
-        run: &PrimitiveRun,
-        render_tasks: &mut RenderTaskTree,
-        gpu_cache: &mut GpuCache,
-        resource_cache: &mut ResourceCache,
-        pipelines: &FastHashMap<PipelineId, ScenePipeline>,
-        clip_scroll_tree: &ClipScrollTree,
-        device_pixel_ratio: f32,
-        profile_counters: &mut FrameProfileCounters,
-    ) {
-        let stacking_context_index = *self.stacking_context_stack.last().unwrap();
-        let scroll_node = &clip_scroll_tree.nodes[&run.clip_and_scroll.scroll_node_id];
-        let clip_node = &clip_scroll_tree.nodes[&run.clip_and_scroll.clip_node_id()];
-
-        if !clip_node.is_visible() {
-            debug!("{:?} of clipped out {:?}", run.base_prim_index, stacking_context_index);
-            return;
-        }
-
-        let stacking_context = &mut self.stacking_context_store[stacking_context_index.0];
-        let pipeline_id = {
-            if !stacking_context.can_contribute_to_scene() {
-                return;
-            }
-
-            // At least one primitive in this stacking context is visible, so the stacking
-            // context is visible.
-            stacking_context.is_visible = true;
-            stacking_context.pipeline_id
-        };
-
-        debug!(
-            "\t{:?} of {:?}",
-            run.base_prim_index,
-            stacking_context_index,
-        );
-
-        let display_list = &pipelines
-            .get(&pipeline_id)
-            .expect("No display list?")
-            .display_list;
-
-        if !stacking_context.is_backface_visible && scroll_node.world_content_transform.is_backface_visible() {
-            return;
-        }
-
-        let prim_context = PrimitiveContext::new(
-            device_pixel_ratio,
-            display_list,
-            clip_node,
-            scroll_node,
-        );
-
-        let result = self.prim_store.prepare_prim_run(
-            run,
-            &prim_context,
-            gpu_cache,
-            resource_cache,
-            render_tasks,
-            &mut self.clip_store,
-        );
-
-        if result.visible_primitives > 0 {
-            stacking_context.screen_bounds = stacking_context
-                .screen_bounds
-                .union(&result.device_rect);
-            stacking_context.isolated_items_bounds = stacking_context
-                .isolated_items_bounds
-                .union(&result.local_rect);
-            stacking_context.has_any_primitive = true;
-
-            profile_counters.visible_primitives.add(result.visible_primitives);
-        }
-    }
-
-    fn handle_pop_stacking_context(
-        &mut self,
-        screen_rect: &DeviceIntRect,
-        clip_scroll_tree: &ClipScrollTree) {
-        let stacking_context_index = self.stacking_context_stack.pop().unwrap();
-
-        let (bounding_rect, is_visible, is_preserve_3d, reference_id, reference_bounds) = {
-            let stacking_context =
-                &mut self.stacking_context_store[stacking_context_index.0];
-            if !stacking_context.has_any_primitive {
-                stacking_context.isolated_items_bounds = stacking_context.children_sc_bounds;
-            } else if stacking_context.isolation != ContextIsolation::Items {
-                stacking_context.isolated_items_bounds = stacking_context
-                    .isolated_items_bounds
-                    .union(&stacking_context.children_sc_bounds);
-            }
-            stacking_context.screen_bounds = stacking_context
-                .screen_bounds
-                .intersection(screen_rect)
-                .unwrap_or(DeviceIntRect::zero());
-            (
-                stacking_context.screen_bounds.clone(),
-                stacking_context.is_visible,
-                stacking_context.isolation == ContextIsolation::Items,
-                stacking_context.reference_frame_id,
-                stacking_context
-                    .isolated_items_bounds
-                    .translate(&stacking_context.reference_frame_offset),
-            )
-        };
-
-        if let Some(ref mut parent_index) = self.stacking_context_stack.last_mut() {
-            let parent = &mut self.stacking_context_store[parent_index.0];
-            parent.screen_bounds = parent.screen_bounds.union(&bounding_rect);
-            let child_bounds = reference_bounds.translate(&-parent.reference_frame_offset);
-            let frame_node = clip_scroll_tree
-                .nodes
-                .get(&reference_id)
-                .unwrap();
-            let local_transform = match frame_node.node_type {
-                NodeType::ReferenceFrame(ref info) => info.transform,
-                _ => LayerToScrollTransform::identity(),
-            };
-            let transformed_bounds = local_transform
-                .with_destination::<LayerPixel>()
-                .transform_rect(&child_bounds);
-            parent.children_sc_bounds = parent.children_sc_bounds.union(&transformed_bounds);
-            // add children local bounds only for non-item-isolated contexts
-            if !is_preserve_3d && parent.reference_frame_id == reference_id {
-                parent.isolated_items_bounds = parent.isolated_items_bounds.union(&child_bounds);
-            }
-            // Per-primitive stacking context visibility checks do not take into account
-            // visibility of child stacking contexts, so do that now.
-            parent.is_visible = parent.is_visible || is_visible;
-        }
-    }
-
     /// Compute the contribution (bounding rectangles, and resources) of layers and their
     /// primitives in screen space.
     fn build_layer_screen_rects_and_cull_layers(
         &mut self,
-        screen_rect: &DeviceIntRect,
         clip_scroll_tree: &mut ClipScrollTree,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         profile_counters: &mut FrameProfileCounters,
         device_pixel_ratio: f32,
+        scene_properties: &SceneProperties,
     ) {
         profile_scope!("cull");
 
-        debug!("processing commands...");
-        let commands = mem::replace(&mut self.cmds, Vec::new());
-        for cmd in &commands {
-            match *cmd {
-                PrimitiveRunCmd::PushStackingContext(stacking_context_index) => {
-                    self.handle_push_stacking_context(stacking_context_index)
-                }
-                PrimitiveRunCmd::PrimitiveRun(ref run) => {
-                    self.handle_primitive_run(
-                        run,
-                        render_tasks,
-                        gpu_cache,
-                        resource_cache,
-                        pipelines,
-                        clip_scroll_tree,
-                        device_pixel_ratio,
-                        profile_counters,
-                    );
-                }
-                PrimitiveRunCmd::PopStackingContext => {
-                    self.handle_pop_stacking_context(screen_rect, clip_scroll_tree);
-                }
-            }
-        }
+        // The root picture is always the first one added.
+        let prim_run_cmds = mem::replace(&mut self.prim_store.cpu_pictures[0].runs, Vec::new());
+        let root_clip_scroll_node = &clip_scroll_tree.nodes[&clip_scroll_tree.root_reference_frame_id()];
+
+        let display_list = &pipelines
+            .get(&root_clip_scroll_node.pipeline_id)
+            .expect("No display list?")
+            .display_list;
+
+        let root_prim_context = PrimitiveContext::new(
+            device_pixel_ratio,
+            display_list,
+            root_clip_scroll_node,
+            root_clip_scroll_node,
+        );
+
+        let mut child_tasks = Vec::new();
+
+        self.prim_store.reset_prim_visibility();
 
-        mem::replace(&mut self.cmds, commands);
+        self.prim_store.prepare_prim_runs(
+            &prim_run_cmds,
+            root_clip_scroll_node.pipeline_id,
+            gpu_cache,
+            resource_cache,
+            render_tasks,
+            &mut self.clip_store,
+            clip_scroll_tree,
+            pipelines,
+            &root_prim_context,
+            true,
+            &mut child_tasks,
+            profile_counters,
+            None,
+            scene_properties,
+        );
+
+        let pic = &mut self.prim_store.cpu_pictures[0];
+        pic.runs = prim_run_cmds;
+
+        let root_render_task = RenderTask::new_alpha_batch(
+            DeviceIntPoint::zero(),
+            RenderTaskLocation::Fixed,
+            PrimitiveIndex(0),
+            None,
+            child_tasks,
+        );
+
+        pic.render_task_id = Some(render_tasks.add(root_render_task));
     }
 
     fn update_scroll_bars(&mut self, clip_scroll_tree: &ClipScrollTree, gpu_cache: &mut GpuCache) {
         static SCROLLBAR_PADDING: f32 = 8.0;
 
         for scrollbar_prim in &self.scrollbar_prims {
             let metadata = &mut self.prim_store.cpu_metadata[scrollbar_prim.prim_index.0];
             let scroll_frame = &clip_scroll_tree.nodes[&scrollbar_prim.clip_id];
@@ -1547,346 +1614,28 @@ impl FrameBuilder {
 
             metadata.local_rect.origin.x = frame_rect.origin.x + frame_rect.size.width -
                 (metadata.local_rect.size.width + SCROLLBAR_PADDING);
             metadata.local_rect.origin.y = util::lerp(min_y, max_y, amount_scrolled);
             metadata.local_clip_rect = metadata.local_rect;
         }
     }
 
-    fn build_render_task(
-        &mut self,
-        clip_scroll_tree: &ClipScrollTree,
-        gpu_cache: &mut GpuCache,
-        render_tasks: &mut RenderTaskTree,
-        output_pipelines: &FastHashSet<PipelineId>,
-        device_pixel_ratio: f32,
-    ) -> RenderTaskId {
-        profile_scope!("build_render_task");
-
-        let mut next_z = 0;
-        let mut sc_stack: Vec<StackingContextIndex> = Vec::new();
-        let mut current_task =
-            RenderTask::new_alpha_batch(DeviceIntPoint::zero(), RenderTaskLocation::Fixed, None);
-        // A stack of the alpha batcher tasks. We create them on the way down,
-        // and then actually populate with items and dependencies on the way up.
-        let mut alpha_task_stack = Vec::new();
-        // A map of "preserve-3d" contexts. We are baking these into render targets
-        // and only compositing once we are out of "preserve-3d" hierarchy.
-        // The stacking contexts that fall into this category are
-        //  - ones with `ContextIsolation::Items`, for their actual items to be backed
-        //  - immediate children of `ContextIsolation::Items`
-        let mut preserve_3d_map_stack: Vec<FastHashMap<StackingContextIndex, RenderTaskId>> =
-            Vec::new();
-        // The plane splitter stack, using a simple BSP tree.
-        let mut splitter_stack = Vec::new();
-
-        debug!("build_render_task()");
-
-        for cmd in &self.cmds {
-            match *cmd {
-                PrimitiveRunCmd::PushStackingContext(stacking_context_index) => {
-                    let parent_isolation = sc_stack
-                        .last()
-                        .map(|index| self.stacking_context_store[index.0].isolation);
-                    let stacking_context = &self.stacking_context_store[stacking_context_index.0];
-                    sc_stack.push(stacking_context_index);
-
-                    if !stacking_context.is_visible {
-                        continue;
-                    }
-
-                    debug!(
-                        "\tpush {:?} {:?}",
-                        stacking_context_index,
-                        stacking_context.isolation
-                    );
-
-                    let stacking_context_rect = &stacking_context.screen_bounds;
-                    let composite_count = stacking_context.composite_ops.count();
-
-                    // If this stacking context if the root of a pipeline, and the caller
-                    // has requested it as an output frame, create a render task to isolate it.
-                    if stacking_context.is_pipeline_root &&
-                        output_pipelines.contains(&stacking_context.pipeline_id)
-                    {
-                        alpha_task_stack.push(current_task);
-                        current_task = RenderTask::new_dynamic_alpha_batch(
-                            stacking_context_rect,
-                            Some(stacking_context.pipeline_id),
-                        );
-                    }
-
-                    if stacking_context.isolation == ContextIsolation::Full && composite_count == 0
-                    {
-                        alpha_task_stack.push(current_task);
-                        current_task =
-                            RenderTask::new_dynamic_alpha_batch(stacking_context_rect, None);
-                    }
-
-                    if parent_isolation == Some(ContextIsolation::Items) ||
-                        stacking_context.isolation == ContextIsolation::Items
-                    {
-                        if parent_isolation != Some(ContextIsolation::Items) {
-                            splitter_stack.push(BspSplitter::new());
-                            preserve_3d_map_stack.push(FastHashMap::default());
-                        }
-                        alpha_task_stack.push(current_task);
-                        current_task =
-                            RenderTask::new_dynamic_alpha_batch(stacking_context_rect, None);
-                        //Note: technically, we shouldn't make a new alpha task for "preserve-3d" contexts
-                        // that have no child items (only other stacking contexts). However, we don't know if
-                        // there are any items at this time (in `PushStackingContext`).
-                        //Note: the reason we add the polygon for splitting during `Push*` as opposed to `Pop*`
-                        // is because we need to preserve the order of drawing for planes that match together.
-                        let frame_node = clip_scroll_tree
-                            .nodes
-                            .get(&stacking_context.reference_frame_id)
-                            .unwrap();
-                        let sc_polygon =
-                            make_polygon(stacking_context, frame_node, stacking_context_index.0);
-                        debug!(
-                            "\tsplitter[{}]: add {:?} -> {:?} with bounds {:?}",
-                            splitter_stack.len(),
-                            stacking_context_index,
-                            sc_polygon,
-                            stacking_context.isolated_items_bounds
-                        );
-                        splitter_stack.last_mut().unwrap().add(sc_polygon);
-                    }
-
-                    for _ in 0 .. composite_count {
-                        alpha_task_stack.push(current_task);
-                        current_task =
-                            RenderTask::new_dynamic_alpha_batch(stacking_context_rect, None);
-                    }
-                }
-                PrimitiveRunCmd::PopStackingContext => {
-                    let stacking_context_index = sc_stack.pop().unwrap();
-                    let stacking_context = &self.stacking_context_store[stacking_context_index.0];
-                    let composite_count = stacking_context.composite_ops.count();
-
-                    if !stacking_context.is_visible {
-                        continue;
-                    }
-
-                    debug!("\tpop {:?}", stacking_context_index);
-                    let parent_isolation = sc_stack
-                        .last()
-                        .map(|index| self.stacking_context_store[index.0].isolation);
-
-                    if stacking_context.isolation == ContextIsolation::Full && composite_count == 0
-                    {
-                        let mut prev_task = alpha_task_stack.pop().unwrap();
-                        let screen_origin = current_task.as_alpha_batch().screen_origin;
-                        let current_task_size = current_task.get_dynamic_size();
-                        let current_task_id = render_tasks.add(current_task);
-                        let item = AlphaRenderItem::HardwareComposite(
-                            stacking_context_index,
-                            current_task_id,
-                            HardwareCompositeOp::PremultipliedAlpha,
-                            screen_origin,
-                            next_z,
-                            current_task_size,
-                        );
-                        next_z += 1;
-                        prev_task.as_alpha_batch_mut().items.push(item);
-                        prev_task.children.push(current_task_id);
-                        current_task = prev_task;
-                    }
-
-                    for filter in &stacking_context.composite_ops.filters {
-                        let mut prev_task = alpha_task_stack.pop().unwrap();
-                        let screen_origin = current_task.as_alpha_batch().screen_origin;
-                        let current_task_id = render_tasks.add(current_task);
-                        match *filter {
-                            FilterOp::Blur(blur_radius) => {
-                                let blur_radius = device_length(blur_radius, device_pixel_ratio);
-                                let blur_std_deviation = blur_radius.0 as f32;
-                                let inflate_size = blur_std_deviation * BLUR_SAMPLE_SCALE;
-                                render_tasks.get_mut(current_task_id)
-                                            .inflate(inflate_size as i32);
-                                let blur_render_task = RenderTask::new_blur(
-                                    blur_std_deviation,
-                                    current_task_id,
-                                    render_tasks,
-                                    RenderTargetKind::Color,
-                                    &[],
-                                    ClearMode::Transparent,
-                                    PremultipliedColorF::TRANSPARENT,
-                                );
-                                let blur_render_task_id = render_tasks.add(blur_render_task);
-                                let item = AlphaRenderItem::HardwareComposite(
-                                    stacking_context_index,
-                                    blur_render_task_id,
-                                    HardwareCompositeOp::PremultipliedAlpha,
-                                    DeviceIntPoint::new(
-                                        screen_origin.x - inflate_size as i32,
-                                        screen_origin.y - inflate_size as i32,
-                                    ),
-                                    next_z,
-                                    render_tasks.get(current_task_id).get_dynamic_size(),
-                                );
-                                prev_task.as_alpha_batch_mut().items.push(item);
-                                prev_task.children.push(blur_render_task_id);
-                                current_task = prev_task;
-                            }
-                            _ => {
-                                let item = AlphaRenderItem::Blend(
-                                    stacking_context_index,
-                                    current_task_id,
-                                    *filter,
-                                    next_z,
-                                );
-                                prev_task.as_alpha_batch_mut().items.push(item);
-                                prev_task.children.push(current_task_id);
-                                current_task = prev_task;
-                            }
-                        }
-                        next_z += 1;
-                    }
-
-                    if let Some(mix_blend_mode) = stacking_context.composite_ops.mix_blend_mode {
-                        let backdrop_task =
-                            RenderTask::new_readback(stacking_context.screen_bounds);
-                        let source_task_id = render_tasks.add(current_task);
-                        let backdrop_task_id = render_tasks.add(backdrop_task);
-
-                        let mut prev_task = alpha_task_stack.pop().unwrap();
-                        let item = AlphaRenderItem::Composite(
-                            stacking_context_index,
-                            source_task_id,
-                            backdrop_task_id,
-                            mix_blend_mode,
-                            next_z,
-                        );
-                        next_z += 1;
-                        prev_task.as_alpha_batch_mut().items.push(item);
-                        prev_task.children.push(source_task_id);
-                        prev_task.children.push(backdrop_task_id);
-                        current_task = prev_task;
-                    }
-
-                    if parent_isolation == Some(ContextIsolation::Items) ||
-                        stacking_context.isolation == ContextIsolation::Items
-                    {
-                        //Note: we don't register the dependent tasks here. It's only done
-                        // when we are out of the `preserve-3d` branch (see the code below),
-                        // since this is only where the parent task is known.
-                        let current_task_id = render_tasks.add(current_task);
-                        preserve_3d_map_stack
-                            .last_mut()
-                            .unwrap()
-                            .insert(stacking_context_index, current_task_id);
-                        current_task = alpha_task_stack.pop().unwrap();
-                    }
-
-                    if parent_isolation != Some(ContextIsolation::Items) &&
-                        stacking_context.isolation == ContextIsolation::Items
-                    {
-                        debug!("\tsplitter[{}]: flush", splitter_stack.len());
-                        let mut splitter = splitter_stack.pop().unwrap();
-                        // Flush the accumulated plane splits onto the task tree.
-                        // Notice how this is done before splitting in order to avoid duplicate tasks.
-                        current_task
-                            .children
-                            .extend(preserve_3d_map_stack.last().unwrap().values().cloned());
-                        // Z axis is directed at the screen, `sort` is ascending, and we need back-to-front order.
-                        for poly in splitter.sort(vec3(0.0, 0.0, 1.0)) {
-                            let sc_index = StackingContextIndex(poly.anchor);
-                            let task_id = preserve_3d_map_stack.last().unwrap()[&sc_index];
-                            debug!("\t\tproduce {:?} -> {:?} for {:?}", sc_index, poly, task_id);
-                            let pp = &poly.points;
-                            let gpu_blocks = [
-                                [pp[0].x as f32, pp[0].y as f32, pp[0].z as f32, pp[1].x as f32].into(),
-                                [pp[1].y as f32, pp[1].z as f32, pp[2].x as f32, pp[2].y as f32].into(),
-                                [pp[2].z as f32, pp[3].x as f32, pp[3].y as f32, pp[3].z as f32].into(),
-                            ];
-                            let handle = gpu_cache.push_per_frame_blocks(&gpu_blocks);
-                            let item =
-                                AlphaRenderItem::SplitComposite(sc_index, task_id, handle, next_z);
-                            current_task.as_alpha_batch_mut().items.push(item);
-                        }
-                        preserve_3d_map_stack.pop();
-                        next_z += 1;
-                    }
-
-                    if stacking_context.is_pipeline_root &&
-                        output_pipelines.contains(&stacking_context.pipeline_id)
-                    {
-                        let mut prev_task = alpha_task_stack.pop().unwrap();
-                        let screen_origin = current_task.as_alpha_batch().screen_origin;
-                        let current_task_size = current_task.get_dynamic_size();
-                        let current_task_id = render_tasks.add(current_task);
-                        let item = AlphaRenderItem::HardwareComposite(
-                            stacking_context_index,
-                            current_task_id,
-                            HardwareCompositeOp::PremultipliedAlpha,
-                            screen_origin,
-                            next_z,
-                            current_task_size,
-                        );
-                        next_z += 1;
-                        prev_task.as_alpha_batch_mut().items.push(item);
-                        prev_task.children.push(current_task_id);
-                        current_task = prev_task;
-                    }
-                }
-                PrimitiveRunCmd::PrimitiveRun(ref run) => {
-                    let stacking_context_index = *sc_stack.last().unwrap();
-                    if !self.stacking_context_store[stacking_context_index.0].is_visible {
-                        continue;
-                    }
-
-                    debug!("\trun of {} items", run.count);
-
-                    let clip_node = &clip_scroll_tree.nodes[&run.clip_and_scroll.clip_node_id()];
-                    if !clip_node.is_visible() {
-                        continue;
-                    }
-                    let scroll_node = &clip_scroll_tree.nodes[&run.clip_and_scroll.scroll_node_id];
-
-                    for i in 0 .. run.count {
-                        let prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
-
-                        if self.prim_store.cpu_metadata[prim_index.0].screen_rect.is_some() {
-                            self.prim_store
-                                .add_render_tasks_for_prim(prim_index, &mut current_task);
-                            let item =
-                                AlphaRenderItem::Primitive(
-                                    clip_node.node_data_index,
-                                    scroll_node.node_data_index,
-                                    prim_index,
-                                    next_z
-                                );
-                            current_task.as_alpha_batch_mut().items.push(item);
-                            next_z += 1;
-                        }
-                    }
-                }
-            }
-        }
-
-        debug_assert!(alpha_task_stack.is_empty());
-        debug_assert!(preserve_3d_map_stack.is_empty());
-        render_tasks.add(current_task)
-    }
-
     pub fn build(
         &mut self,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         frame_id: FrameId,
         clip_scroll_tree: &mut ClipScrollTree,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         device_pixel_ratio: f32,
         pan: LayerPoint,
-        output_pipelines: &FastHashSet<PipelineId>,
         texture_cache_profile: &mut TextureCacheProfileCounters,
         gpu_cache_profile: &mut GpuCacheProfileCounters,
+        scene_properties: &SceneProperties,
     ) -> Frame {
         profile_scope!("build");
 
         let mut profile_counters = FrameProfileCounters::new();
         profile_counters
             .total_primitives
             .set(self.prim_store.prim_count());
 
@@ -1906,65 +1655,66 @@ impl FrameBuilder {
         clip_scroll_tree.update_tree(
             &screen_rect,
             device_pixel_ratio,
             &mut self.clip_store,
             resource_cache,
             gpu_cache,
             pan,
             &mut node_data,
+            scene_properties,
         );
 
         self.update_scroll_bars(clip_scroll_tree, gpu_cache);
 
         let mut render_tasks = RenderTaskTree::new();
 
         self.build_layer_screen_rects_and_cull_layers(
-            &screen_rect,
             clip_scroll_tree,
             pipelines,
             resource_cache,
             gpu_cache,
             &mut render_tasks,
             &mut profile_counters,
             device_pixel_ratio,
+            scene_properties,
         );
 
-        let main_render_task_id = self.build_render_task(
-            clip_scroll_tree,
-            gpu_cache,
-            &mut render_tasks,
-            output_pipelines,
-            device_pixel_ratio,
-        );
+        let main_render_task_id = self.prim_store
+                                      .cpu_pictures[0]
+                                      .render_task_id
+                                      .expect("bug: no root render task!");
 
         let mut required_pass_count = 0;
         render_tasks.max_depth(main_render_task_id, 0, &mut required_pass_count);
 
         resource_cache.block_until_all_resources_added(gpu_cache, texture_cache_profile);
 
         let mut deferred_resolves = vec![];
 
         let mut passes = Vec::new();
 
         // Do the allocations now, assigning each tile's tasks to a render
         // pass and target as required.
         for index in 0 .. required_pass_count {
-            passes.push(RenderPass::new(index == required_pass_count - 1));
+            passes.push(RenderPass::new(
+                index == required_pass_count - 1,
+                screen_rect.size,
+            ));
         }
 
         render_tasks.assign_to_passes(main_render_task_id, passes.len() - 1, &mut passes);
 
         for pass in &mut passes {
             let ctx = RenderTargetContext {
                 device_pixel_ratio,
-                stacking_context_store: &self.stacking_context_store,
                 prim_store: &self.prim_store,
                 resource_cache,
                 node_data: &node_data,
+                clip_scroll_tree,
             };
 
             pass.build(
                 &ctx,
                 gpu_cache,
                 &mut render_tasks,
                 &mut deferred_resolves,
                 &self.clip_store,
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -4,17 +4,16 @@
 
 use api::{ClipId, DevicePoint, DeviceUintRect, DocumentId, Epoch};
 use api::{ExternalImageData, ExternalImageId};
 use api::{ImageFormat, PipelineId};
 use api::DebugCommand;
 use device::TextureFilter;
 use fxhash::FxHasher;
 use profiler::BackendProfileCounters;
-use renderer::BlendMode;
 use std::{usize, i32};
 use std::collections::{HashMap, HashSet};
 use std::f32;
 use std::hash::BuildHasherDefault;
 use std::path::PathBuf;
 use std::sync::Arc;
 use tiling;
 
@@ -182,29 +181,13 @@ pub enum ResultMsg {
         BackendProfileCounters,
     ),
     UpdateResources {
         updates: TextureUpdateList,
         cancel_rendering: bool,
     },
 }
 
-#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq)]
-pub struct StackingContextIndex(pub usize);
-
 #[derive(Clone, Copy, Debug)]
 pub struct UvRect {
     pub uv0: DevicePoint,
     pub uv1: DevicePoint,
 }
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub enum HardwareCompositeOp {
-    PremultipliedAlpha,
-}
-
-impl HardwareCompositeOp {
-    pub fn to_blend_mode(&self) -> BlendMode {
-        match *self {
-            HardwareCompositeOp::PremultipliedAlpha => BlendMode::PremultipliedAlpha,
-        }
-    }
-}
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -71,16 +71,17 @@ mod glyph_cache;
 mod glyph_rasterizer;
 mod gpu_cache;
 mod gpu_types;
 mod internal_types;
 mod picture;
 mod prim_store;
 mod print_tree;
 mod profiler;
+mod query;
 mod record;
 mod render_backend;
 mod render_task;
 mod renderer;
 mod resource_cache;
 mod scene;
 mod spring;
 mod texture_allocator;
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -1,214 +1,388 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{BorderRadiusKind, ColorF, ClipAndScrollInfo};
-use api::{device_length, DeviceIntSize};
+use api::{BorderRadiusKind, ColorF, ClipAndScrollInfo, FilterOp, MixBlendMode};
+use api::{device_length, DeviceIntRect, DeviceIntSize, PipelineId};
 use api::{BoxShadowClipMode, LayerPoint, LayerRect, LayerSize, LayerVector2D, Shadow};
+use api::{ClipId, PremultipliedColorF};
 use box_shadow::BLUR_SAMPLE_SCALE;
 use frame_builder::PrimitiveContext;
 use gpu_cache::GpuDataRequest;
-use prim_store::{PrimitiveIndex, PrimitiveRun};
+use prim_store::{PrimitiveIndex, PrimitiveRun, PrimitiveRunLocalRect};
 use render_task::{ClearMode, RenderTask, RenderTaskId, RenderTaskTree};
+use scene::{FilterOpHelpers, SceneProperties};
 use tiling::RenderTargetKind;
 
 /*
  A picture represents a dynamically rendered image. It consists of:
 
  * A number of primitives that are drawn onto the picture.
  * A composite operation describing how to composite this
    picture into its parent.
  * A configuration describing how to draw the primitives on
    this picture (e.g. in screen space or local space).
  */
 
+/// Specifies how this Picture should be composited
+/// onto the target it belongs to.
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum PictureCompositeMode {
+    /// Apply CSS mix-blend-mode effect.
+    MixBlend(MixBlendMode),
+    /// Apply a CSS filter.
+    Filter(FilterOp),
+    /// Draw to intermediate surface, copy straight across. This
+    /// is used for CSS isolation, and plane splitting.
+    Blit,
+}
+
 #[derive(Debug)]
 pub enum PictureKind {
     TextShadow {
         offset: LayerVector2D,
         color: ColorF,
         blur_radius: f32,
+        content_rect: LayerRect,
     },
     BoxShadow {
         blur_radius: f32,
         color: ColorF,
         blur_regions: Vec<LayerRect>,
         clip_mode: BoxShadowClipMode,
         radii_kind: BorderRadiusKind,
+        content_rect: LayerRect,
+    },
+    Image {
+        // If a mix-blend-mode, contains the render task for
+        // the readback of the framebuffer that we use to sample
+        // from in the mix-blend-mode shader.
+        readback_render_task_id: Option<RenderTaskId>,
+        /// How this picture should be composited.
+        /// If None, don't composite - just draw directly on parent surface.
+        composite_mode: Option<PictureCompositeMode>,
+        // If true, this picture is part of a 3D context.
+        is_in_3d_context: bool,
+        // If requested as a frame output (for rendering
+        // pages to a texture), this is the pipeline this
+        // picture is the root of.
+        frame_output_pipeline_id: Option<PipelineId>,
+        // The original reference frame ID for this picture.
+        // It is only different if this is part of a 3D
+        // rendering context.
+        reference_frame_id: ClipId,
+        real_local_rect: LayerRect,
     },
 }
 
 #[derive(Debug)]
 pub struct PicturePrimitive {
-    pub prim_runs: Vec<PrimitiveRun>,
+    // If this picture is drawn to an intermediate surface,
+    // the associated render task.
     pub render_task_id: Option<RenderTaskId>,
+
+    // Details specific to this type of picture.
     pub kind: PictureKind,
-    pub content_rect: LayerRect,
+
+    // List of primitive runs that make up this picture.
+    pub runs: Vec<PrimitiveRun>,
+
+    // The pipeline that the primitives on this picture belong to.
+    pub pipeline_id: PipelineId,
+
+    // If true, apply visibility culling to primitives on this
+    // picture. For text shadows and box shadows, we want to
+    // unconditionally draw them.
+    pub cull_children: bool,
 
     // TODO(gw): Add a mode that specifies if this
     //           picture should be rasterized in
     //           screen-space or local-space.
 }
 
 impl PicturePrimitive {
-    pub fn new_text_shadow(shadow: Shadow) -> Self {
+    pub fn new_text_shadow(shadow: Shadow, pipeline_id: PipelineId) -> Self {
         PicturePrimitive {
-            prim_runs: Vec::new(),
+            runs: Vec::new(),
             render_task_id: None,
-            content_rect: LayerRect::zero(),
             kind: PictureKind::TextShadow {
                 offset: shadow.offset,
                 color: shadow.color,
                 blur_radius: shadow.blur_radius,
+                content_rect: LayerRect::zero(),
             },
+            pipeline_id,
+            cull_children: false,
+        }
+    }
+
+    pub fn resolve_scene_properties(&mut self, properties: &SceneProperties) -> bool {
+        match self.kind {
+            PictureKind::Image { ref mut composite_mode, .. } => {
+                match composite_mode {
+                    &mut Some(PictureCompositeMode::Filter(ref mut filter)) => {
+                        match filter {
+                            &mut FilterOp::Opacity(ref binding, ref mut value) => {
+                                *value = properties.resolve_float(binding, *value);
+                            }
+                            _ => {}
+                        }
+
+                        filter.is_visible()
+                    }
+                    _ => true,
+                }
+            }
+            _ => true
         }
     }
 
     pub fn new_box_shadow(
         blur_radius: f32,
         color: ColorF,
         blur_regions: Vec<LayerRect>,
         clip_mode: BoxShadowClipMode,
         radii_kind: BorderRadiusKind,
+        pipeline_id: PipelineId,
     ) -> Self {
         PicturePrimitive {
-            prim_runs: Vec::new(),
+            runs: Vec::new(),
             render_task_id: None,
-            content_rect: LayerRect::zero(),
             kind: PictureKind::BoxShadow {
                 blur_radius,
                 color,
                 blur_regions,
                 clip_mode,
                 radii_kind,
+                content_rect: LayerRect::zero(),
             },
+            pipeline_id,
+            cull_children: false,
+        }
+    }
+
+    pub fn new_image(
+        composite_mode: Option<PictureCompositeMode>,
+        is_in_3d_context: bool,
+        pipeline_id: PipelineId,
+        reference_frame_id: ClipId,
+        frame_output_pipeline_id: Option<PipelineId>,
+    ) -> PicturePrimitive {
+        PicturePrimitive {
+            runs: Vec::new(),
+            render_task_id: None,
+            kind: PictureKind::Image {
+                readback_render_task_id: None,
+                composite_mode,
+                is_in_3d_context,
+                frame_output_pipeline_id,
+                reference_frame_id,
+                real_local_rect: LayerRect::zero(),
+            },
+            pipeline_id,
+            cull_children: true,
         }
     }
 
     pub fn add_primitive(
         &mut self,
         prim_index: PrimitiveIndex,
-        local_rect: &LayerRect,
         clip_and_scroll: ClipAndScrollInfo
     ) {
-        // TODO(gw): Accumulating the primitive local rect
-        //           into the content rect here is fine, for now.
-        //           The only way pictures are currently used,
-        //           all the items added to a picture are known
-        //           to be in the same local space. Once we start
-        //           using pictures for other uses, we will need
-        //           to consider the space of a primitive in order
-        //           to build a correct contect rect!
-        self.content_rect = self.content_rect.union(local_rect);
-
-        if let Some(ref mut run) = self.prim_runs.last_mut() {
+        if let Some(ref mut run) = self.runs.last_mut() {
             if run.clip_and_scroll == clip_and_scroll &&
                run.base_prim_index.0 + run.count == prim_index.0 {
                 run.count += 1;
                 return;
             }
         }
 
-        self.prim_runs.push(PrimitiveRun {
+        self.runs.push(PrimitiveRun {
             base_prim_index: prim_index,
             count: 1,
             clip_and_scroll,
         });
     }
 
-    pub fn build(&mut self) -> LayerRect {
+    pub fn update_local_rect(&mut self,
+        prim_local_rect: LayerRect,
+        prim_run_rect: PrimitiveRunLocalRect,
+    ) -> LayerRect {
+        let local_content_rect = prim_run_rect.local_rect_in_actual_parent_space;
+
         match self.kind {
-            PictureKind::TextShadow { offset, blur_radius, .. } => {
+            PictureKind::Image { composite_mode, ref mut real_local_rect, .. } => {
+                *real_local_rect = prim_run_rect.local_rect_in_original_parent_space;
+
+                match composite_mode {
+                    Some(PictureCompositeMode::Filter(FilterOp::Blur(blur_radius))) => {
+                        let inflate_size = blur_radius * BLUR_SAMPLE_SCALE;
+                        local_content_rect.inflate(inflate_size, inflate_size)
+                    }
+                    _ => {
+                        local_content_rect
+                    }
+                }
+            }
+            PictureKind::TextShadow { offset, blur_radius, ref mut content_rect, .. } => {
                 let blur_offset = blur_radius * BLUR_SAMPLE_SCALE;
 
-                self.content_rect = self.content_rect.inflate(
+                *content_rect = local_content_rect.inflate(
                     blur_offset,
                     blur_offset,
                 );
 
-                self.content_rect.translate(&offset)
+                content_rect.translate(&offset)
             }
-            PictureKind::BoxShadow { blur_radius, clip_mode, radii_kind, .. } => {
+            PictureKind::BoxShadow { blur_radius, clip_mode, radii_kind, ref mut content_rect, .. } => {
                 // We need to inflate the content rect if outset.
                 match clip_mode {
                     BoxShadowClipMode::Outset => {
                         let blur_offset = blur_radius * BLUR_SAMPLE_SCALE;
 
                         // If the radii are uniform, we can render just the top
                         // left corner and mirror it across the primitive. In
                         // this case, shift the content rect to leave room
                         // for the blur to take effect.
                         match radii_kind {
                             BorderRadiusKind::Uniform => {
                                 let origin = LayerPoint::new(
-                                    self.content_rect.origin.x - blur_offset,
-                                    self.content_rect.origin.y - blur_offset,
+                                    local_content_rect.origin.x - blur_offset,
+                                    local_content_rect.origin.y - blur_offset,
                                 );
                                 let size = LayerSize::new(
-                                    self.content_rect.size.width + blur_offset,
-                                    self.content_rect.size.height + blur_offset,
+                                    local_content_rect.size.width + blur_offset,
+                                    local_content_rect.size.height + blur_offset,
                                 );
-                                self.content_rect = LayerRect::new(origin, size);
+                                *content_rect = LayerRect::new(origin, size);
                             }
                             BorderRadiusKind::NonUniform => {
                                 // For a non-uniform radii, we need to expand
                                 // the content rect on all sides for the blur.
-                                self.content_rect = self.content_rect.inflate(
+                                *content_rect = local_content_rect.inflate(
                                     blur_offset,
                                     blur_offset,
                                 );
                             }
                         }
                     }
-                    BoxShadowClipMode::Inset => {}
+                    BoxShadowClipMode::Inset => {
+                        *content_rect = local_content_rect;
+                    }
                 }
 
-                self.content_rect
+                prim_local_rect
             }
         }
     }
 
     pub fn prepare_for_render(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_context: &PrimitiveContext,
         render_tasks: &mut RenderTaskTree,
+        screen_rect: &DeviceIntRect,
+        child_tasks: Vec<RenderTaskId>,
+        parent_tasks: &mut Vec<RenderTaskId>,
     ) {
-        // This is a shadow element. Create a render task that will
-        // render the text run to a target, and then apply a gaussian
-        // blur to that text run in order to build the actual primitive
-        // which will be blitted to the framebuffer.
+        match self.kind {
+            PictureKind::Image {
+                ref mut readback_render_task_id,
+                composite_mode,
+                frame_output_pipeline_id,
+                ..
+            } => {
+                match composite_mode {
+                    Some(PictureCompositeMode::Filter(FilterOp::Blur(blur_radius))) => {
+                        let picture_task = RenderTask::new_dynamic_alpha_batch(
+                            screen_rect,
+                            prim_index,
+                            None,
+                            child_tasks,
+                        );
+
+                        let blur_radius = device_length(blur_radius, prim_context.device_pixel_ratio);
+                        let blur_std_deviation = blur_radius.0 as f32;
+                        let picture_task_id = render_tasks.add(picture_task);
+
+                        let blur_render_task = RenderTask::new_blur(
+                            blur_std_deviation,
+                            picture_task_id,
+                            render_tasks,
+                            RenderTargetKind::Color,
+                            &[],
+                            ClearMode::Transparent,
+                            PremultipliedColorF::TRANSPARENT,
+                        );
 
-        // TODO(gw): Rounding the content rect here to device pixels is not
-        // technically correct. Ideally we should ceil() here, and ensure that
-        // the extra part pixel in the case of fractional sizes is correctly
-        // handled. For now, just use rounding which passes the existing
-        // Gecko tests.
-        let cache_width =
-            (self.content_rect.size.width * prim_context.device_pixel_ratio).round() as i32;
-        let cache_height =
-            (self.content_rect.size.height * prim_context.device_pixel_ratio).round() as i32;
-        let cache_size = DeviceIntSize::new(cache_width, cache_height);
+                        let blur_render_task_id = render_tasks.add(blur_render_task);
+                        self.render_task_id = Some(blur_render_task_id);
+                    }
+                    Some(PictureCompositeMode::MixBlend(..)) => {
+                        let picture_task = RenderTask::new_dynamic_alpha_batch(
+                            screen_rect,
+                            prim_index,
+                            None,
+                            child_tasks,
+                        );
+
+                        let readback_task_id = render_tasks.add(RenderTask::new_readback(*screen_rect));
+
+                        *readback_render_task_id = Some(readback_task_id);
+                        parent_tasks.push(readback_task_id);
+
+                        self.render_task_id = Some(render_tasks.add(picture_task));
+                    }
+                    Some(PictureCompositeMode::Filter(..)) | Some(PictureCompositeMode::Blit) => {
+                        let picture_task = RenderTask::new_dynamic_alpha_batch(
+                            screen_rect,
+                            prim_index,
+                            frame_output_pipeline_id,
+                            child_tasks,
+                        );
 
-        match self.kind {
-            PictureKind::TextShadow { blur_radius, color, .. } => {
+                        self.render_task_id = Some(render_tasks.add(picture_task));
+                    }
+                    None => {
+                        parent_tasks.extend(child_tasks);
+                        self.render_task_id = None;
+                    }
+                }
+            }
+            PictureKind::TextShadow { blur_radius, color, content_rect, .. } => {
+                // This is a shadow element. Create a render task that will
+                // render the text run to a target, and then apply a gaussian
+                // blur to that text run in order to build the actual primitive
+                // which will be blitted to the framebuffer.
+
                 let blur_radius = device_length(blur_radius, prim_context.device_pixel_ratio);
 
+                // TODO(gw): Rounding the content rect here to device pixels is not
+                // technically correct. Ideally we should ceil() here, and ensure that
+                // the extra part pixel in the case of fractional sizes is correctly
+                // handled. For now, just use rounding which passes the existing
+                // Gecko tests.
+                let cache_width =
+                    (content_rect.size.width * prim_context.device_pixel_ratio).round() as i32;
+                let cache_height =
+                    (content_rect.size.height * prim_context.device_pixel_ratio).round() as i32;
+                let cache_size = DeviceIntSize::new(cache_width, cache_height);
+
                 // Quote from https://drafts.csswg.org/css-backgrounds-3/#shadow-blur
                 // "the image that would be generated by applying to the shadow a
                 // Gaussian blur with a standard deviation equal to half the blur radius."
                 let blur_std_deviation = blur_radius.0 as f32 * 0.5;
 
                 let picture_task = RenderTask::new_picture(
                     cache_size,
                     prim_index,
                     RenderTargetKind::Color,
-                    self.content_rect.origin,
+                    content_rect.origin,
                     color.premultiplied(),
                     ClearMode::Transparent,
                 );
 
                 let picture_task_id = render_tasks.add(picture_task);
 
                 let render_task = RenderTask::new_blur(
                     blur_std_deviation,
@@ -217,19 +391,30 @@ impl PicturePrimitive {
                     RenderTargetKind::Color,
                     &[],
                     ClearMode::Transparent,
                     color.premultiplied(),
                 );
 
                 self.render_task_id = Some(render_tasks.add(render_task));
             }
-            PictureKind::BoxShadow { blur_radius, clip_mode, ref blur_regions, color, .. } => {
+            PictureKind::BoxShadow { blur_radius, clip_mode, ref blur_regions, color, content_rect, .. } => {
                 let blur_radius = device_length(blur_radius, prim_context.device_pixel_ratio);
 
+                // TODO(gw): Rounding the content rect here to device pixels is not
+                // technically correct. Ideally we should ceil() here, and ensure that
+                // the extra part pixel in the case of fractional sizes is correctly
+                // handled. For now, just use rounding which passes the existing
+                // Gecko tests.
+                let cache_width =
+                    (content_rect.size.width * prim_context.device_pixel_ratio).round() as i32;
+                let cache_height =
+                    (content_rect.size.height * prim_context.device_pixel_ratio).round() as i32;
+                let cache_size = DeviceIntSize::new(cache_width, cache_height);
+
                 // Quote from https://drafts.csswg.org/css-backgrounds-3/#shadow-blur
                 // "the image that would be generated by applying to the shadow a
                 // Gaussian blur with a standard deviation equal to half the blur radius."
                 let blur_std_deviation = blur_radius.0 as f32 * 0.5;
 
                 let blur_clear_mode = match clip_mode {
                     BoxShadowClipMode::Outset => {
                         ClearMode::One
@@ -238,17 +423,17 @@ impl PicturePrimitive {
                         ClearMode::Zero
                     }
                 };
 
                 let picture_task = RenderTask::new_picture(
                     cache_size,
                     prim_index,
                     RenderTargetKind::Alpha,
-                    self.content_rect.origin,
+                    content_rect.origin,
                     color.premultiplied(),
                     ClearMode::Zero,
                 );
 
                 let picture_task_id = render_tasks.add(picture_task);
 
                 let render_task = RenderTask::new_blur(
                     blur_std_deviation,
@@ -258,23 +443,28 @@ impl PicturePrimitive {
                     blur_regions,
                     blur_clear_mode,
                     color.premultiplied(),
                 );
 
                 self.render_task_id = Some(render_tasks.add(render_task));
             }
         }
+
+        if let Some(render_task_id) = self.render_task_id {
+            parent_tasks.push(render_task_id);
+        }
     }
 
     pub fn write_gpu_blocks(&self, mut _request: GpuDataRequest) {
         // TODO(gw): We'll need to write the GPU blocks
         //           here specific to a brush primitive
         //           once we start drawing pictures as brushes!
     }
 
     pub fn target_kind(&self) -> RenderTargetKind {
         match self.kind {
             PictureKind::TextShadow { .. } => RenderTargetKind::Color,
             PictureKind::BoxShadow { .. } => RenderTargetKind::Alpha,
+            PictureKind::Image { .. } => RenderTargetKind::Color,
         }
     }
 }
--- a/gfx/webrender/src/platform/windows/font.rs
+++ b/gfx/webrender/src/platform/windows/font.rs
@@ -35,49 +35,46 @@ fn dwrite_texture_type(render_mode: Font
         FontRenderMode::Alpha | FontRenderMode::Subpixel => dwrote::DWRITE_TEXTURE_CLEARTYPE_3x1,
     }
 }
 
 fn dwrite_measure_mode(
     render_mode: FontRenderMode,
     options: Option<FontInstancePlatformOptions>,
 ) -> dwrote::DWRITE_MEASURING_MODE {
-    if let Some(FontInstancePlatformOptions {
-        force_gdi_rendering: true,
-        ..
-    }) = options
-    {
+    let FontInstancePlatformOptions { force_gdi_rendering, use_embedded_bitmap, .. } =
+        options.unwrap_or_default();
+    if force_gdi_rendering || use_embedded_bitmap {
         return dwrote::DWRITE_MEASURING_MODE_GDI_CLASSIC;
     }
 
     match render_mode {
         FontRenderMode::Mono | FontRenderMode::Bitmap => dwrote::DWRITE_MEASURING_MODE_GDI_NATURAL,
         FontRenderMode::Alpha | FontRenderMode::Subpixel => dwrote::DWRITE_MEASURING_MODE_NATURAL,
     }
 }
 
 fn dwrite_render_mode(
     font_face: &dwrote::FontFace,
     render_mode: FontRenderMode,
     em_size: f32,
     measure_mode: dwrote::DWRITE_MEASURING_MODE,
     options: Option<FontInstancePlatformOptions>,
 ) -> dwrote::DWRITE_RENDERING_MODE {
-    if let Some(FontInstancePlatformOptions {
-        force_gdi_rendering: true,
-        ..
-    }) = options
-    {
-        return dwrote::DWRITE_RENDERING_MODE_GDI_CLASSIC;
-    }
+    let FontInstancePlatformOptions { force_gdi_rendering, use_embedded_bitmap, .. } =
+        options.unwrap_or_default();
 
     let dwrite_render_mode = match render_mode {
         FontRenderMode::Mono | FontRenderMode::Bitmap => dwrote::DWRITE_RENDERING_MODE_ALIASED,
         FontRenderMode::Alpha | FontRenderMode::Subpixel => {
-            font_face.get_recommended_rendering_mode_default_params(em_size, 1.0, measure_mode)
+            if force_gdi_rendering || use_embedded_bitmap {
+                dwrote::DWRITE_RENDERING_MODE_GDI_CLASSIC
+            } else {
+                font_face.get_recommended_rendering_mode_default_params(em_size, 1.0, measure_mode)
+            }
         }
     };
 
     if dwrite_render_mode == dwrote::DWRITE_RENDERING_MODE_OUTLINE {
         // Outline mode is not supported
         return dwrote::DWRITE_RENDERING_MODE_CLEARTYPE_NATURAL_SYMMETRIC;
     }
 
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -2,45 +2,43 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, BuiltDisplayList, ColorF, ComplexClipRegion, DeviceIntRect};
 use api::{DevicePoint, ExtendMode, GlyphInstance, GlyphKey};
 use api::{GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerRect};
 use api::{ClipMode, LayerSize, LayerVector2D, LineOrientation, LineStyle};
 use api::{ClipAndScrollInfo, EdgeAaSegmentMask, PremultipliedColorF, TileOffset};
-use api::{YuvColorSpace, YuvFormat};
+use api::{ClipId, LayerTransform, PipelineId, YuvColorSpace, YuvFormat};
 use border::BorderCornerInstance;
-use clip::{ClipSourcesHandle, ClipStore, Geometry};
+use clip_scroll_tree::ClipScrollTree;
+use clip::{ClipSourcesHandle, ClipStore};
 use frame_builder::PrimitiveContext;
 use glyph_rasterizer::FontInstance;
+use internal_types::FastHashMap;
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
                 ToGpuBlocks};
-use picture::PicturePrimitive;
-use render_task::{ClipWorkItem, ClipChainNode, RenderTask, RenderTaskId, RenderTaskTree};
+use picture::{PictureKind, PicturePrimitive};
+use profiler::FrameProfileCounters;
+use render_task::{ClipWorkItem, ClipChainNode};
+use render_task::{RenderTask, RenderTaskId, RenderTaskTree};
 use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 use resource_cache::{ImageProperties, ResourceCache};
+use scene::{ScenePipeline, SceneProperties};
 use std::{mem, usize};
 use std::rc::Rc;
 use util::{pack_as_float, recycle_vec, MatrixHelpers, TransformedRect, TransformedRectKind};
 
-#[derive(Clone, Debug)]
+#[derive(Debug)]
 pub struct PrimitiveRun {
     pub base_prim_index: PrimitiveIndex,
     pub count: usize,
     pub clip_and_scroll: ClipAndScrollInfo,
 }
 
-#[derive(Debug)]
-pub struct PrimitiveRunResult {
-    pub local_rect: LayerRect,
-    pub device_rect: DeviceIntRect,
-    pub visible_primitives: usize,
-}
-
 #[derive(Debug, Copy, Clone)]
 pub struct PrimitiveOpacity {
     pub is_opaque: bool,
 }
 
 impl PrimitiveOpacity {
     pub fn opaque() -> PrimitiveOpacity {
         PrimitiveOpacity { is_opaque: true }
@@ -56,16 +54,37 @@ impl PrimitiveOpacity {
         }
     }
 
     pub fn accumulate(&mut self, alpha: f32) {
         self.is_opaque = self.is_opaque && alpha == 1.0;
     }
 }
 
+// Represents the local space rect of a list of
+// primitive runs. For most primitive runs, the
+// primitive runs are attached to the parent they
+// are declared in. However, when a primitive run
+// is part of a 3d rendering context, it may get
+// hoisted to a higher level in the picture tree.
+// When this happens, we need to also calculate the
+// local space rects in the original space. This
+// allows constructing the true world space polygons
+// for the primitive, to enable the plane splitting
+// logic to work correctly.
+// TODO(gw) In the future, we can probably simplify
+//          this - perhaps calculate the world space
+//          polygons directly and store internally
+//          in the picture structure.
+#[derive(Debug)]
+pub struct PrimitiveRunLocalRect {
+    pub local_rect_in_actual_parent_space: LayerRect,
+    pub local_rect_in_original_parent_space: LayerRect,
+}
+
 /// Stores two coordinates in texel space. The coordinates
 /// are stored in texel coordinates because the texture atlas
 /// may grow. Storing them as texel coords and normalizing
 /// the UVs in the vertex shader means nothing needs to be
 /// updated on the CPU when the texture size changes.
 #[derive(Copy, Clone, Debug)]
 pub struct TexelRect {
     pub uv0: DevicePoint,
@@ -1057,64 +1076,38 @@ impl PrimitiveStore {
     pub fn get_metadata(&self, index: PrimitiveIndex) -> &PrimitiveMetadata {
         &self.cpu_metadata[index.0]
     }
 
     pub fn prim_count(&self) -> usize {
         self.cpu_metadata.len()
     }
 
-    /// Add any task dependencies for this primitive to the provided task.
-    pub fn add_render_tasks_for_prim(&self, prim_index: PrimitiveIndex, task: &mut RenderTask) {
-        // Add any dynamic render tasks needed to render this primitive
-        let metadata = &self.cpu_metadata[prim_index.0];
-
-        let render_task_id = match metadata.prim_kind {
-            PrimitiveKind::Picture => {
-                let picture = &self.cpu_pictures[metadata.cpu_prim_index.0];
-                picture.render_task_id
-            }
-            PrimitiveKind::Rectangle |
-            PrimitiveKind::TextRun |
-            PrimitiveKind::Image |
-            PrimitiveKind::AlignedGradient |
-            PrimitiveKind::YuvImage |
-            PrimitiveKind::Border |
-            PrimitiveKind::AngleGradient |
-            PrimitiveKind::RadialGradient |
-            PrimitiveKind::Line |
-            PrimitiveKind::Brush => None,
-        };
-
-        if let Some(render_task_id) = render_task_id {
-            task.children.push(render_task_id);
-        }
-
-        if let Some(clip_task_id) = metadata.clip_task_id {
-            task.children.push(clip_task_id);
-        }
-    }
-
     fn prepare_prim_for_render_inner(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_context: &PrimitiveContext,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
+        child_tasks: Vec<RenderTaskId>,
+        parent_tasks: &mut Vec<RenderTaskId>,
     ) {
         let metadata = &mut self.cpu_metadata[prim_index.0];
         match metadata.prim_kind {
             PrimitiveKind::Rectangle | PrimitiveKind::Border | PrimitiveKind::Line => {}
             PrimitiveKind::Picture => {
                 self.cpu_pictures[metadata.cpu_prim_index.0]
                     .prepare_for_render(
                         prim_index,
                         prim_context,
-                        render_tasks
+                        render_tasks,
+                        metadata.screen_rect.as_ref().expect("bug: trying to draw an off-screen picture!?"),
+                        child_tasks,
+                        parent_tasks,
                     );
             }
             PrimitiveKind::TextRun => {
                 let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
                 text.prepare_for_render(
                     resource_cache,
                     prim_context.device_pixel_ratio,
                     prim_context.display_list,
@@ -1221,18 +1214,20 @@ impl PrimitiveStore {
         &mut self,
         prim_index: PrimitiveIndex,
         prim_context: &PrimitiveContext,
         prim_screen_rect: DeviceIntRect,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         clip_store: &mut ClipStore,
+        tasks: &mut Vec<RenderTaskId>,
     ) -> bool {
         let metadata = &mut self.cpu_metadata[prim_index.0];
+        metadata.clip_task_id = None;
         let transform = &prim_context.scroll_node.world_content_transform;
 
         clip_store.get_mut(&metadata.clip_sources).update(
             transform,
             gpu_cache,
             resource_cache,
             prim_context.device_pixel_ratio,
         );
@@ -1278,150 +1273,284 @@ impl PrimitiveStore {
                 clip_store,
                 is_axis_aligned,
                 prim_context.scroll_node.coordinate_system_id,
             )
         } else {
             None
         };
 
-        metadata.clip_task_id = clip_task.map(|clip_task| render_tasks.add(clip_task));
+        if let Some(clip_task) = clip_task {
+            let clip_task_id = render_tasks.add(clip_task);
+
+            metadata.clip_task_id = Some(clip_task_id);
+            tasks.push(clip_task_id);
+        }
+
         true
     }
 
     pub fn prepare_prim_for_render(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_context: &PrimitiveContext,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         clip_store: &mut ClipStore,
-    ) -> Option<Geometry> {
-        let (geometry, dependent_primitives) = {
+        clip_scroll_tree: &ClipScrollTree,
+        pipelines: &FastHashMap<PipelineId, ScenePipeline>,
+        perform_culling: bool,
+        parent_tasks: &mut Vec<RenderTaskId>,
+        scene_properties: &SceneProperties,
+        profile_counters: &mut FrameProfileCounters,
+    ) -> Option<LayerRect> {
+        // Reset the visibility of this primitive.
+        // Do some basic checks first, that can early out
+        // without even knowing the local rect.
+        let (cpu_prim_index, dependencies, cull_children) = {
             let metadata = &mut self.cpu_metadata[prim_index.0];
             metadata.screen_rect = None;
 
+            if perform_culling &&
+               !metadata.is_backface_visible &&
+               prim_context.scroll_node.world_content_transform.is_backface_visible() {
+                return None;
+            }
+
+            let (dependencies, cull_children) = match metadata.prim_kind {
+                PrimitiveKind::Picture => {
+                    let pic = &mut self.cpu_pictures[metadata.cpu_prim_index.0];
+
+                    if !pic.resolve_scene_properties(scene_properties) {
+                        return None;
+                    }
+
+                    let rfid = match pic.kind {
+                        PictureKind::Image { reference_frame_id, .. } => Some(reference_frame_id),
+                        _ => None,
+                    };
+                    (Some((pic.pipeline_id, mem::replace(&mut pic.runs, Vec::new()), rfid)), pic.cull_children)
+                }
+                _ => {
+                    (None, true)
+                }
+            };
+
+            (metadata.cpu_prim_index, dependencies, cull_children)
+        };
+
+        // If we have dependencies, we need to prepare them first, in order
+        // to know the actual rect of this primitive.
+        // For example, scrolling may affect the location of an item in
+        // local space, which may force us to render this item on a larger
+        // picture target, if being composited.
+        let mut child_tasks = Vec::new();
+        if let Some((pipeline_id, dependencies, rfid)) = dependencies {
+            let result = self.prepare_prim_runs(
+                &dependencies,
+                pipeline_id,
+                gpu_cache,
+                resource_cache,
+                render_tasks,
+                clip_store,
+                clip_scroll_tree,
+                pipelines,
+                prim_context,
+                cull_children,
+                &mut child_tasks,
+                profile_counters,
+                rfid,
+                scene_properties,
+            );
+
+            let metadata = &mut self.cpu_metadata[prim_index.0];
+
+            // Restore the dependencies (borrow check dance)
+            let pic = &mut self.cpu_pictures[cpu_prim_index.0];
+            pic.runs = dependencies;
+
+            metadata.local_rect = pic.update_local_rect(
+                metadata.local_rect,
+                result,
+            );
+        }
+
+        let (local_rect, device_rect) = {
+            let metadata = &mut self.cpu_metadata[prim_index.0];
             if metadata.local_rect.size.width <= 0.0 ||
                metadata.local_rect.size.height <= 0.0 {
                 warn!("invalid primitive rect {:?}", metadata.local_rect);
                 return None;
             }
 
-            if !metadata.is_backface_visible &&
-               prim_context.scroll_node.world_content_transform.is_backface_visible() {
-                return None;
-            }
-
             let local_rect = metadata
                 .local_rect
                 .intersection(&metadata.local_clip_rect);
 
             let local_rect = match local_rect {
                 Some(local_rect) => local_rect,
-                None => return None,
+                None if perform_culling => return None,
+                None => LayerRect::zero(),
             };
 
             let xf_rect = TransformedRect::new(
                 &local_rect,
                 &prim_context.scroll_node.world_content_transform,
                 prim_context.device_pixel_ratio
             );
 
             let clip_bounds = &prim_context.clip_node.combined_clip_outer_bounds;
             metadata.screen_rect = xf_rect.bounding_rect
                                           .intersection(clip_bounds);
 
-            let geometry = match metadata.screen_rect {
-                Some(device_rect) => Geometry {
-                    local_rect,
-                    device_rect,
-                },
-                None => return None,
+            let device_rect = match metadata.screen_rect {
+                Some(device_rect) => device_rect,
+                None => {
+                    if perform_culling {
+                        return None
+                    } else {
+                        DeviceIntRect::zero()
+                    }
+                }
             };
 
-            let dependencies = match metadata.prim_kind {
-                PrimitiveKind::Picture =>
-                    self.cpu_pictures[metadata.cpu_prim_index.0].prim_runs.clone(),
-                _ => Vec::new(),
-            };
-            (geometry, dependencies)
+            (local_rect, device_rect)
         };
 
-        // Recurse into any sub primitives and prepare them for rendering first.
-        // TODO(gw): This code is a bit hacky to work around the borrow checker.
-        //           Specifically, the clone() below on the primitive list for
-        //           text shadow primitives. Consider restructuring this code to
-        //           avoid borrow checker issues.
-        for run in dependent_primitives {
-            for i in 0 .. run.count {
-                let sub_prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
-
-                self.prepare_prim_for_render_inner(
-                    sub_prim_index,
-                    prim_context,
-                    resource_cache,
-                    gpu_cache,
-                    render_tasks,
-                );
-            }
-        }
-
         if !self.update_clip_task(
             prim_index,
             prim_context,
-            geometry.device_rect,
+            device_rect,
             resource_cache,
             gpu_cache,
             render_tasks,
             clip_store,
-        ) {
+            parent_tasks,
+        ) && perform_culling {
             return None;
         }
 
         self.prepare_prim_for_render_inner(
             prim_index,
             prim_context,
             resource_cache,
             gpu_cache,
             render_tasks,
+            child_tasks,
+            parent_tasks,
         );
 
-        Some(geometry)
+        Some(local_rect)
     }
 
-    pub fn prepare_prim_run(
+    // TODO(gw): Make this simpler / more efficient by tidying
+    //           up the logic that early outs from prepare_prim_for_render.
+    pub fn reset_prim_visibility(&mut self) {
+        for md in &mut self.cpu_metadata {
+            md.screen_rect = None;
+        }
+    }
+
+    pub fn prepare_prim_runs(
         &mut self,
-        run: &PrimitiveRun,
-        prim_context: &PrimitiveContext,
+        runs: &[PrimitiveRun],
+        pipeline_id: PipelineId,
         gpu_cache: &mut GpuCache,
         resource_cache: &mut ResourceCache,
         render_tasks: &mut RenderTaskTree,
         clip_store: &mut ClipStore,
-    ) -> PrimitiveRunResult {
-        let mut result = PrimitiveRunResult {
-            local_rect: LayerRect::zero(),
-            device_rect: DeviceIntRect::zero(),
-            visible_primitives: 0,
+        clip_scroll_tree: &ClipScrollTree,
+        pipelines: &FastHashMap<PipelineId, ScenePipeline>,
+        parent_prim_context: &PrimitiveContext,
+        perform_culling: bool,
+        parent_tasks: &mut Vec<RenderTaskId>,
+        profile_counters: &mut FrameProfileCounters,
+        original_reference_frame_id: Option<ClipId>,
+        scene_properties: &SceneProperties,
+    ) -> PrimitiveRunLocalRect {
+        let mut result = PrimitiveRunLocalRect {
+            local_rect_in_actual_parent_space: LayerRect::zero(),
+            local_rect_in_original_parent_space: LayerRect::zero(),
         };
 
-        for i in 0 .. run.count {
-            let prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
+        for run in runs {
+            // TODO(gw): Perhaps we can restructure this to not need to create
+            //           a new primitive context for every run (if the hash
+            //           lookups ever show up in a profile).
+            let scroll_node = &clip_scroll_tree.nodes[&run.clip_and_scroll.scroll_node_id];
+            let clip_node = &clip_scroll_tree.nodes[&run.clip_and_scroll.clip_node_id()];
+
+            if perform_culling && !clip_node.is_visible() {
+                debug!("{:?} of clipped out {:?}", run.base_prim_index, pipeline_id);
+                continue;
+            }
+
+            let parent_relative_transform = parent_prim_context
+                .scroll_node
+                .world_content_transform
+                .inverse()
+                .map(|inv_parent| {
+                    inv_parent.pre_mul(&scroll_node.world_content_transform)
+                });
+
+            let original_relative_transform = original_reference_frame_id
+                .and_then(|original_reference_frame_id| {
+                    let parent = clip_scroll_tree
+                        .nodes[&original_reference_frame_id]
+                        .world_content_transform;
+                    parent.inverse()
+                        .map(|inv_parent| {
+                            inv_parent.pre_mul(&scroll_node.world_content_transform)
+                        })
+                });
+
+            let display_list = &pipelines
+                .get(&pipeline_id)
+                .expect("No display list?")
+                .display_list;
 
-            if let Some(prim_geom) = self.prepare_prim_for_render(
-                prim_index,
-                prim_context,
-                resource_cache,
-                gpu_cache,
-                render_tasks,
-                clip_store,
-            ) {
-                result.local_rect = result.local_rect.union(&prim_geom.local_rect);
-                result.device_rect = result.device_rect.union(&prim_geom.device_rect);
-                result.visible_primitives += 1;
+            let child_prim_context = PrimitiveContext::new(
+                parent_prim_context.device_pixel_ratio,
+                display_list,
+                clip_node,
+                scroll_node,
+            );
+
+            for i in 0 .. run.count {
+                let prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
+
+                if let Some(prim_local_rect) = self.prepare_prim_for_render(
+                    prim_index,
+                    &child_prim_context,
+                    resource_cache,
+                    gpu_cache,
+                    render_tasks,
+                    clip_store,
+                    clip_scroll_tree,
+                    pipelines,
+                    perform_culling,
+                    parent_tasks,
+                    scene_properties,
+                    profile_counters,
+                ) {
+                    profile_counters.visible_primitives.inc();
+
+                    if let Some(ref matrix) = original_relative_transform {
+                        let bounds = get_local_bounding_rect(&prim_local_rect, matrix);
+                        result.local_rect_in_original_parent_space =
+                            result.local_rect_in_original_parent_space.union(&bounds);
+                    }
+
+                    if let Some(ref matrix) = parent_relative_transform {
+                        let bounds = get_local_bounding_rect(&prim_local_rect, matrix);
+                        result.local_rect_in_actual_parent_space =
+                            result.local_rect_in_actual_parent_space.union(&bounds);
+                    }
+                }
             }
         }
 
         result
     }
 }
 
 //Test for one clip region contains another
@@ -1443,8 +1572,34 @@ impl InsideTest<ComplexClipRegion> for C
             clip.radii.top_right.width >= self.radii.top_right.width - delta_right &&
             clip.radii.top_right.height >= self.radii.top_right.height - delta_top &&
             clip.radii.bottom_left.width >= self.radii.bottom_left.width - delta_left &&
             clip.radii.bottom_left.height >= self.radii.bottom_left.height - delta_bottom &&
             clip.radii.bottom_right.width >= self.radii.bottom_right.width - delta_right &&
             clip.radii.bottom_right.height >= self.radii.bottom_right.height - delta_bottom
     }
 }
+
+fn get_local_bounding_rect(
+    local_rect: &LayerRect,
+    matrix: &LayerTransform
+) -> LayerRect {
+    let vertices = [
+        matrix.transform_point3d(&local_rect.origin.to_3d()),
+        matrix.transform_point3d(&local_rect.bottom_left().to_3d()),
+        matrix.transform_point3d(&local_rect.bottom_right().to_3d()),
+        matrix.transform_point3d(&local_rect.top_right().to_3d()),
+    ];
+
+    let mut x0 = vertices[0].x;
+    let mut y0 = vertices[0].y;
+    let mut x1 = vertices[0].x;
+    let mut y1 = vertices[0].y;
+
+    for v in &vertices[1..] {
+        x0 = x0.min(v.x);
+        y0 = y0.min(v.y);
+        x1 = x1.max(v.x);
+        y1 = y1.max(v.y);
+    }
+
+    LayerRect::new(LayerPoint::new(x0, y0), LayerSize::new(x1 - x0, y1 - y0))
+}
--- a/gfx/webrender/src/profiler.rs
+++ b/gfx/webrender/src/profiler.rs
@@ -1,19 +1,18 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ColorF, ColorU};
 use debug_render::DebugRenderer;
-use device::{Device, GpuMarker, GpuSampler, GpuTimer, NamedTag};
 use euclid::{Point2D, Rect, Size2D, vec2};
+use query::{GpuSampler, GpuTimer, NamedTag};
 use std::collections::vec_deque::VecDeque;
-use std::f32;
-use std::mem;
+use std::{f32, mem};
 use time::precise_time_ns;
 
 const GRAPH_WIDTH: f32 = 1024.0;
 const GRAPH_HEIGHT: f32 = 320.0;
 const GRAPH_PADDING: f32 = 8.0;
 const GRAPH_FRAME_HEIGHT: f32 = 16.0;
 const PROFILE_PADDING: f32 = 10.0;
 
@@ -789,34 +788,32 @@ impl Profiler {
             self.y_left = new_y;
         } else {
             self.y_right = new_y;
         }
     }
 
     pub fn draw_profile(
         &mut self,
-        device: &mut Device,
         frame_profile: &FrameProfileCounters,
         backend_profile: &BackendProfileCounters,
         renderer_profile: &RendererProfileCounters,
         renderer_timers: &mut RendererProfileTimers,
         gpu_samplers: &[GpuSampler<GpuProfileTag>],
         screen_fraction: f32,
         debug_renderer: &mut DebugRenderer,
     ) {
-        let _gm = GpuMarker::new(device.rc_gl(), "profile");
         self.x_left = 20.0;
         self.y_left = 40.0;
         self.x_right = 400.0;
         self.y_right = 40.0;
 
         let mut gpu_time = 0;
-        let gpu_samples = mem::replace(&mut renderer_timers.gpu_samples, Vec::new());
-        for sample in &gpu_samples {
+        let gpu_timers = mem::replace(&mut renderer_timers.gpu_samples, Vec::new());
+        for sample in &gpu_timers {
             gpu_time += sample.time_ns;
         }
         renderer_timers.gpu_time.set(gpu_time);
 
         self.draw_counters(&[&renderer_profile.frame_time], debug_renderer, true);
 
         self.draw_counters(
             &[
@@ -877,41 +874,43 @@ impl Profiler {
                 &backend_profile.total_time,
                 &renderer_timers.cpu_time,
                 &renderer_timers.gpu_time,
             ],
             debug_renderer,
             false,
         );
 
-        let mut samplers = Vec::<FloatProfileCounter>::new();
-        // Gathering unique GPU samplers. This has O(N^2) complexity,
-        // but we only have a few samplers per target.
-        for sampler in gpu_samplers {
-            let value = sampler.count as f32 * screen_fraction;
-            match samplers.iter().position(|s| {
-                s.description as *const _ == sampler.tag.label as *const _
-            }) {
-                Some(pos) => samplers[pos].value += value,
-                None => samplers.push(FloatProfileCounter {
-                    description: sampler.tag.label,
-                    value,
-                }),
+        if !gpu_samplers.is_empty() {
+            let mut samplers = Vec::<FloatProfileCounter>::new();
+            // Gathering unique GPU samplers. This has O(N^2) complexity,
+            // but we only have a few samplers per target.
+            for sampler in gpu_samplers {
+                let value = sampler.count as f32 * screen_fraction;
+                match samplers.iter().position(|s| {
+                    s.description as *const _ == sampler.tag.label as *const _
+                }) {
+                    Some(pos) => samplers[pos].value += value,
+                    None => samplers.push(FloatProfileCounter {
+                        description: sampler.tag.label,
+                        value,
+                    }),
+                }
             }
+            self.draw_counters(&samplers, debug_renderer, false);
         }
-        self.draw_counters(&samplers, debug_renderer, false);
 
         self.backend_time
             .push(backend_profile.total_time.nanoseconds);
         self.compositor_time
             .push(renderer_timers.cpu_time.nanoseconds);
         self.ipc_time
             .push(backend_profile.ipc.total_time.nanoseconds);
         self.gpu_time.push(gpu_time);
-        self.gpu_frames.push(gpu_time, gpu_samples);
+        self.gpu_frames.push(gpu_time, gpu_timers);
 
 
         let rect =
             self.backend_time
                 .draw_graph(self.x_left, self.y_left, "CPU (backend)", debug_renderer);
         self.y_left += rect.size.height + PROFILE_PADDING;
         let rect = self.compositor_time.draw_graph(
             self.x_left,
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/src/query.rs
@@ -0,0 +1,319 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use gleam::gl;
+use std::mem;
+use std::rc::Rc;
+
+use device::FrameId;
+
+
+pub trait NamedTag {
+    fn get_label(&self) -> &str;
+}
+
+#[derive(Debug, Clone)]
+pub struct GpuTimer<T> {
+    pub tag: T,
+    pub time_ns: u64,
+}
+
+#[derive(Debug, Clone)]
+pub struct GpuSampler<T> {
+    pub tag: T,
+    pub count: u64,
+}
+
+pub struct QuerySet<T> {
+    set: Vec<gl::GLuint>,
+    data: Vec<T>,
+    pending: gl::GLuint,
+}
+
+impl<T> QuerySet<T> {
+    fn new() -> Self {
+        QuerySet {
+            set: Vec::new(),
+            data: Vec::new(),
+            pending: 0,
+        }
+    }
+
+    fn reset(&mut self) {
+        self.data.clear();
+        self.pending = 0;
+    }
+
+    fn add(&mut self, value: T) -> Option<gl::GLuint> {
+        assert_eq!(self.pending, 0);
+        self.set.get(self.data.len()).cloned().map(|query_id| {
+            self.data.push(value);
+            self.pending = query_id;
+            query_id
+        })
+    }
+
+    fn take<F: Fn(&mut T, gl::GLuint)>(&mut self, fun: F) -> Vec<T> {
+        let mut data = mem::replace(&mut self.data, Vec::new());
+        for (value, &query) in data.iter_mut().zip(self.set.iter()) {
+            fun(value, query)
+        }
+        data
+    }
+}
+
+pub struct GpuFrameProfile<T> {
+    gl: Rc<gl::Gl>,
+    timers: QuerySet<GpuTimer<T>>,
+    samplers: QuerySet<GpuSampler<T>>,
+    frame_id: FrameId,
+    inside_frame: bool,
+}
+
+impl<T> GpuFrameProfile<T> {
+    fn new(gl: Rc<gl::Gl>) -> Self {
+        GpuFrameProfile {
+            gl,
+            timers: QuerySet::new(),
+            samplers: QuerySet::new(),
+            frame_id: FrameId::new(0),
+            inside_frame: false,
+        }
+    }
+
+    fn enable_timers(&mut self, count: i32) {
+        self.timers.set = self.gl.gen_queries(count);
+    }
+
+    fn disable_timers(&mut self) {
+        if !self.timers.set.is_empty() {
+            self.gl.delete_queries(&self.timers.set);
+        }
+        self.timers.set = Vec::new();
+    }
+
+    fn enable_samplers(&mut self, count: i32) {
+        self.samplers.set = self.gl.gen_queries(count);
+    }
+
+    fn disable_samplers(&mut self) {
+        if !self.samplers.set.is_empty() {
+            self.gl.delete_queries(&self.samplers.set);
+        }
+        self.samplers.set = Vec::new();
+    }
+
+    fn begin_frame(&mut self, frame_id: FrameId) {
+        self.frame_id = frame_id;
+        self.timers.reset();
+        self.samplers.reset();
+        self.inside_frame = true;
+    }
+
+    fn end_frame(&mut self) {
+        self.finish_timer();
+        self.finish_sampler();
+        self.inside_frame = false;
+    }
+
+    fn finish_timer(&mut self) {
+        debug_assert!(self.inside_frame);
+        if self.timers.pending != 0 {
+            self.gl.end_query(gl::TIME_ELAPSED);
+            self.timers.pending = 0;
+        }
+    }
+
+    fn finish_sampler(&mut self) {
+        debug_assert!(self.inside_frame);
+        if self.samplers.pending != 0 {
+            self.gl.end_query(gl::SAMPLES_PASSED);
+            self.samplers.pending = 0;
+        }
+    }
+}
+
+impl<T: NamedTag> GpuFrameProfile<T> {
+    fn start_timer(&mut self, tag: T) -> GpuTimeQuery {
+        self.finish_timer();
+
+        let marker = GpuMarker::new(&self.gl, tag.get_label());
+
+        if let Some(query) = self.timers.add(GpuTimer { tag, time_ns: 0 }) {
+            self.gl.begin_query(gl::TIME_ELAPSED, query);
+        }
+
+        GpuTimeQuery(marker)
+    }
+
+    fn start_sampler(&mut self, tag: T) -> GpuSampleQuery {
+        self.finish_sampler();
+
+        if let Some(query) = self.samplers.add(GpuSampler { tag, count: 0 }) {
+            self.gl.begin_query(gl::SAMPLES_PASSED, query);
+        }
+
+        GpuSampleQuery
+    }
+
+    fn build_samples(&mut self) -> (FrameId, Vec<GpuTimer<T>>, Vec<GpuSampler<T>>) {
+        debug_assert!(!self.inside_frame);
+        let gl = &self.gl;
+
+        (
+            self.frame_id,
+            self.timers.take(|timer, query| {
+                timer.time_ns = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
+            }),
+            self.samplers.take(|sampler, query| {
+                sampler.count = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
+            }),
+        )
+    }
+}
+
+impl<T> Drop for GpuFrameProfile<T> {
+    fn drop(&mut self) {
+        self.disable_timers();
+        self.disable_samplers();
+    }
+}
+
+pub struct GpuProfiler<T> {
+    gl: Rc<gl::Gl>,
+    frames: Vec<GpuFrameProfile<T>>,
+    next_frame: usize,
+}
+
+impl<T> GpuProfiler<T> {
+    pub fn new(gl: Rc<gl::Gl>) -> Self {
+        const MAX_PROFILE_FRAMES: usize = 4;
+        let frames = (0 .. MAX_PROFILE_FRAMES)
+            .map(|_| GpuFrameProfile::new(Rc::clone(&gl)))
+            .collect();
+
+        GpuProfiler {
+            gl,
+            next_frame: 0,
+            frames,
+        }
+    }
+
+    pub fn enable_timers(&mut self) {
+        const MAX_TIMERS_PER_FRAME: i32 = 256;
+
+        for frame in &mut self.frames {
+            frame.enable_timers(MAX_TIMERS_PER_FRAME);
+        }
+    }
+
+    pub fn disable_timers(&mut self) {
+        for frame in &mut self.frames {
+            frame.disable_timers();
+        }
+    }
+
+    pub fn toggle_timers_enabled(&mut self) {
+        if self.frames[0].timers.set.is_empty() {
+            self.enable_timers();
+        } else {
+            self.disable_timers();
+        }
+    }
+
+    pub fn enable_samplers(&mut self) {
+        const MAX_SAMPLERS_PER_FRAME: i32 = 16;
+        if cfg!(target_os = "macos") {
+            warn!("Expect OSX driver bugs related to sample queries")
+        }
+
+        for frame in &mut self.frames {
+            frame.enable_samplers(MAX_SAMPLERS_PER_FRAME);
+        }
+    }
+
+    pub fn disable_samplers(&mut self) {
+        for frame in &mut self.frames {
+            frame.disable_samplers();
+        }
+    }
+
+    pub fn toggle_samplers_enabled(&mut self) {
+        if self.frames[0].samplers.set.is_empty() {
+            self.enable_samplers();
+        } else {
+            self.disable_samplers();
+        }
+    }
+}
+
+impl<T: NamedTag> GpuProfiler<T> {
+    pub fn build_samples(&mut self) -> (FrameId, Vec<GpuTimer<T>>, Vec<GpuSampler<T>>) {
+        self.frames[self.next_frame].build_samples()
+    }
+
+    pub fn begin_frame(&mut self, frame_id: FrameId) {
+        self.frames[self.next_frame].begin_frame(frame_id);
+    }
+
+    pub fn end_frame(&mut self) {
+        self.frames[self.next_frame].end_frame();
+        self.next_frame = (self.next_frame + 1) % self.frames.len();
+    }
+
+    pub fn start_timer(&mut self, tag: T) -> GpuTimeQuery {
+        self.frames[self.next_frame].start_timer(tag)
+    }
+
+    pub fn start_sampler(&mut self, tag: T) -> GpuSampleQuery {
+        self.frames[self.next_frame].start_sampler(tag)
+    }
+
+    pub fn finish_sampler(&mut self, _sampler: GpuSampleQuery) {
+        self.frames[self.next_frame].finish_sampler()
+    }
+
+    pub fn start_marker(&mut self, label: &str) -> GpuMarker {
+        GpuMarker::new(&self.gl, label)
+    }
+
+    pub fn place_marker(&mut self, label: &str) {
+        GpuMarker::fire(&self.gl, label)
+    }
+}
+
+#[must_use]
+pub struct GpuMarker {
+    gl: Option<Rc<gl::Gl>>,
+}
+
+impl GpuMarker {
+    fn new(gl: &Rc<gl::Gl>, message: &str) -> Self {
+        if gl.get_type() == gl::GlType::Gl {
+            gl.push_group_marker_ext(message);
+            GpuMarker { gl: Some(Rc::clone(gl)) }
+        } else {
+            GpuMarker { gl: None }
+        }
+    }
+
+    fn fire(gl: &Rc<gl::Gl>, message: &str) {
+        if gl.get_type() == gl::GlType::Gl {
+            gl.insert_event_marker_ext(message);
+        }
+    }
+}
+
+impl Drop for GpuMarker {
+    fn drop(&mut self) {
+        if let Some(ref gl) = self.gl {
+            gl.pop_group_marker_ext();
+        }
+    }
+}
+
+#[must_use]
+pub struct GpuTimeQuery(GpuMarker);
+#[must_use]
+pub struct GpuSampleQuery;
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -88,16 +88,17 @@ impl Document {
         let accumulated_scale_factor = self.accumulated_scale_factor();
         self.frame_builder = self.frame_ctx.create(
             self.frame_builder.take(),
             &self.scene,
             resource_cache,
             self.window_size,
             self.inner_rect,
             accumulated_scale_factor,
+            &self.output_pipelines,
         );
     }
 
     fn render(
         &mut self,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         resource_profile: &mut ResourceProfileCounters,
@@ -111,19 +112,19 @@ impl Document {
             Some(ref mut builder) => {
                 self.frame_ctx.build_renderer_frame(
                     builder,
                     resource_cache,
                     gpu_cache,
                     &self.scene.pipelines,
                     accumulated_scale_factor,
                     pan,
-                    &self.output_pipelines,
                     &mut resource_profile.texture_cache,
                     &mut resource_profile.gpu_cache,
+                    &self.scene.properties,
                 )
             }
             None => {
                 self.frame_ctx.get_renderer_frame()
             }
         }
     }
 }
@@ -405,29 +406,18 @@ impl RenderBackend {
                 profile_scope!("GetScrollNodeState");
                 tx.send(doc.frame_ctx.get_scroll_node_state()).unwrap();
                 DocumentOp::Nop
             }
             DocumentMsg::GenerateFrame(property_bindings) => {
                 profile_scope!("GenerateFrame");
                 let _timer = profile_counters.total_time.timer();
 
-                // Ideally, when there are property bindings present,
-                // we won't need to rebuild the entire frame here.
-                // However, to avoid conflicts with the ongoing work to
-                // refactor how scroll roots + transforms work, this
-                // just rebuilds the frame if there are animated property
-                // bindings present for now.
-                // TODO(gw): Once the scrolling / reference frame changes
-                //           are completed, optimize the internals of
-                //           animated properties to not require a full
-                //           rebuild of the frame!
                 if let Some(property_bindings) = property_bindings {
                     doc.scene.properties.set_properties(property_bindings);
-                    doc.build_scene(&mut self.resource_cache);
                 }
 
                 if let Some(ref mut ros) = doc.render_on_scroll {
                     *ros = true;
                 }
 
                 if doc.scene.root_pipeline_id.is_some() {
                     let frame = doc.render(
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -1,25 +1,23 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ClipId, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
-use api::{FilterOp, LayerPoint, LayerRect, MixBlendMode};
+use api::{LayerPoint, LayerRect};
 use api::{PipelineId, PremultipliedColorF};
 use clip::{ClipSource, ClipSourcesWeakHandle, ClipStore};
 use clip_scroll_tree::CoordinateSystemId;
-use gpu_cache::GpuCacheHandle;
 use gpu_types::{ClipScrollNodeIndex};
-use internal_types::HardwareCompositeOp;
-use prim_store::PrimitiveIndex;
+use prim_store::{PrimitiveIndex};
 use std::{cmp, usize, f32, i32};
 use std::rc::Rc;
 use tiling::{RenderPass, RenderTargetIndex};
-use tiling::{RenderTargetKind, StackingContextIndex};
+use tiling::{RenderTargetKind};
 
 const FLOATS_PER_RENDER_TASK_INFO: usize = 12;
 pub const MAX_BLUR_STD_DEVIATION: f32 = 4.0;
 pub const MIN_DOWNSCALING_RT_SIZE: i32 = 128;
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub struct RenderTaskId(pub u32); // TODO(gw): Make private when using GPU cache!
 
@@ -148,41 +146,19 @@ pub enum RenderTaskKey {
 
 #[derive(Debug)]
 pub enum RenderTaskLocation {
     Fixed,
     Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
 }
 
 #[derive(Debug)]
-pub enum AlphaRenderItem {
-    Primitive(ClipScrollNodeIndex, ClipScrollNodeIndex, PrimitiveIndex, i32),
-    Blend(StackingContextIndex, RenderTaskId, FilterOp, i32),
-    Composite(
-        StackingContextIndex,
-        RenderTaskId,
-        RenderTaskId,
-        MixBlendMode,
-        i32,
-    ),
-    SplitComposite(StackingContextIndex, RenderTaskId, GpuCacheHandle, i32),
-    HardwareComposite(
-        StackingContextIndex,
-        RenderTaskId,
-        HardwareCompositeOp,
-        DeviceIntPoint,
-        i32,
-        DeviceIntSize,
-    ),
-}
-
-#[derive(Debug)]
 pub struct AlphaRenderTask {
     pub screen_origin: DeviceIntPoint,
-    pub items: Vec<AlphaRenderItem>,
+    pub prim_index: PrimitiveIndex,
     // If this render task is a registered frame output, this
     // contains the pipeline ID it maps to.
     pub frame_output_pipeline_id: Option<PipelineId>,
 }
 
 #[derive(Debug, Copy, Clone)]
 #[repr(C)]
 pub enum MaskSegment {
@@ -307,40 +283,53 @@ pub struct RenderTask {
     pub cache_key: Option<RenderTaskKey>,
     pub location: RenderTaskLocation,
     pub children: Vec<RenderTaskId>,
     pub kind: RenderTaskKind,
     pub clear_mode: ClearMode,
 }
 
 impl RenderTask {
+    // TODO(gw): In the future we'll remove this
+    //           completely and convert everything
+    //           that is an alpha task to a Picture.
     pub fn new_alpha_batch(
         screen_origin: DeviceIntPoint,
         location: RenderTaskLocation,
+        prim_index: PrimitiveIndex,
         frame_output_pipeline_id: Option<PipelineId>,
+        children: Vec<RenderTaskId>,
     ) -> Self {
         RenderTask {
             cache_key: None,
-            children: Vec::new(),
+            children,
             location,
             kind: RenderTaskKind::Alpha(AlphaRenderTask {
                 screen_origin,
-                items: Vec::new(),
+                prim_index,
                 frame_output_pipeline_id,
             }),
             clear_mode: ClearMode::Transparent,
         }
     }
 
     pub fn new_dynamic_alpha_batch(
         rect: &DeviceIntRect,
+        prim_index: PrimitiveIndex,
         frame_output_pipeline_id: Option<PipelineId>,
+        children: Vec<RenderTaskId>,
     ) -> Self {
         let location = RenderTaskLocation::Dynamic(None, rect.size);
-        Self::new_alpha_batch(rect.origin, location, frame_output_pipeline_id)
+        Self::new_alpha_batch(
+            rect.origin,
+            location,
+            prim_index,
+            frame_output_pipeline_id,
+            children,
+        )
     }
 
     pub fn new_picture(
         size: DeviceIntSize,
         prim_index: PrimitiveIndex,
         target_kind: RenderTargetKind,
         content_origin: LayerPoint,
         color: PremultipliedColorF,
@@ -548,29 +537,16 @@ impl RenderTask {
             kind: RenderTaskKind::Scaling(target_kind),
             clear_mode: match target_kind {
                 RenderTargetKind::Color => ClearMode::Transparent,
                 RenderTargetKind::Alpha => ClearMode::One,
             },
         }
     }
 
-    pub fn as_alpha_batch_mut<'a>(&'a mut self) -> &'a mut AlphaRenderTask {
-        match self.kind {
-            RenderTaskKind::Alpha(ref mut task) => task,
-            RenderTaskKind::Picture(..) |
-            RenderTaskKind::CacheMask(..) |
-            RenderTaskKind::VerticalBlur(..) |
-            RenderTaskKind::Readback(..) |
-            RenderTaskKind::HorizontalBlur(..) |
-            RenderTaskKind::Alias(..) |
-            RenderTaskKind::Scaling(..) => unreachable!(),
-        }
-    }
-
     pub fn as_alpha_batch<'a>(&'a self) -> &'a AlphaRenderTask {
         match self.kind {
             RenderTaskKind::Alpha(ref task) => task,
             RenderTaskKind::Picture(..) |
             RenderTaskKind::CacheMask(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::HorizontalBlur(..) |
@@ -688,59 +664,48 @@ impl RenderTask {
                         0.0,
                     ],
                 }
             }
             RenderTaskKind::Alias(..) => RenderTaskData { data: [0.0; 12] },
         }
     }
 
-    pub fn inflate(&mut self, device_radius: i32) {
-        match self.kind {
-            RenderTaskKind::Alpha(ref mut info) => {
-                match self.location {
-                    RenderTaskLocation::Fixed => {
-                        panic!("bug: inflate only supported for dynamic tasks");
-                    }
-                    RenderTaskLocation::Dynamic(_, ref mut size) => {
-                        size.width += device_radius * 2;
-                        size.height += device_radius * 2;
-                        info.screen_origin.x -= device_radius;
-                        info.screen_origin.y -= device_radius;
-                    }
-                }
-            }
-
-            RenderTaskKind::Readback(..) |
-            RenderTaskKind::CacheMask(..) |
-            RenderTaskKind::VerticalBlur(..) |
-            RenderTaskKind::HorizontalBlur(..) |
-            RenderTaskKind::Picture(..) |
-            RenderTaskKind::Alias(..) |
-            RenderTaskKind::Scaling(..) => {
-                panic!("bug: inflate only supported for alpha tasks");
-            }
-        }
-    }
-
     pub fn get_dynamic_size(&self) -> DeviceIntSize {
         match self.location {
             RenderTaskLocation::Fixed => DeviceIntSize::zero(),
             RenderTaskLocation::Dynamic(_, size) => size,
         }
     }
 
     pub fn get_target_rect(&self) -> (DeviceIntRect, RenderTargetIndex) {
         match self.location {
-            RenderTaskLocation::Fixed => (DeviceIntRect::zero(), RenderTargetIndex(0)),
-            RenderTaskLocation::Dynamic(origin_and_target_index, size) => {
-                let (origin, target_index) =
-                    origin_and_target_index.expect("Should have been allocated by now!");
+            RenderTaskLocation::Fixed => {
+                (DeviceIntRect::zero(), RenderTargetIndex(0))
+            }
+            // Previously, we only added render tasks after the entire
+            // primitive chain was determined visible. This meant that
+            // we could assert any render task in the list was also
+            // allocated (assigned to passes). Now, we add render
+            // tasks earlier, and the picture they belong to may be
+            // culled out later, so we can't assert that the task
+            // has been allocated.
+            // Render tasks that are created but not assigned to
+            // passes consume a row in the render task texture, but
+            // don't allocate any space in render targets nor
+            // draw any pixels.
+            // TODO(gw): Consider some kind of tag or other method
+            //           to mark a task as unused explicitly. This
+            //           would allow us to restore this debug check.
+            RenderTaskLocation::Dynamic(Some((origin, target_index)), size) => {
                 (DeviceIntRect::new(origin, size), target_index)
             }
+            RenderTaskLocation::Dynamic(None, _) => {
+                (DeviceIntRect::zero(), RenderTargetIndex(0))
+            }
         }
     }
 
     pub fn target_kind(&self) -> RenderTargetKind {
         match self.kind {
             RenderTaskKind::Alpha(..) |
             RenderTaskKind::Readback(..) => RenderTargetKind::Color,
 
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -19,33 +19,34 @@ use api::{YuvColorSpace, YuvFormat};
 use api::ApiMsg;
 use api::DebugCommand;
 #[cfg(not(feature = "debugger"))]
 use api::channel::MsgSender;
 use debug_colors;
 use debug_render::DebugRenderer;
 #[cfg(feature = "debugger")]
 use debug_server::{self, DebugServer};
-use device::{DepthFunction, Device, FrameId, GpuMarker, GpuProfiler, Program, Texture,
+use device::{DepthFunction, Device, FrameId, Program, Texture,
              VertexDescriptor, PBO};
 use device::{get_gl_format_bgra, ExternalTexture, FBOId, TextureSlot, VertexAttribute,
              VertexAttributeKind};
-use device::{FileWatcherHandler, GpuTimer, ShaderError, TextureFilter, TextureTarget,
+use device::{FileWatcherHandler, ShaderError, TextureFilter, TextureTarget,
              VertexUsageHint, VAO};
 use euclid::{rect, Transform3D};
 use frame_builder::FrameBuilderConfig;
 use gleam::gl;
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 use gpu_types::PrimitiveInstance;
 use internal_types::{BatchTextures, SourceTexture, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE};
 use internal_types::{CacheTextureId, FastHashMap, RendererFrame, ResultMsg, TextureUpdateOp};
 use internal_types::{DebugOutput, RenderTargetMode, TextureUpdateList, TextureUpdateSource};
 use profiler::{BackendProfileCounters, Profiler};
 use profiler::{GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
+use query::{GpuProfiler, GpuTimer};
 use rayon::Configuration as ThreadPoolConfig;
 use rayon::ThreadPool;
 use record::ApiRecordingReceiver;
 use render_backend::RenderBackend;
 use render_task::RenderTaskTree;
 #[cfg(feature = "debugger")]
 use serde_json;
 use std;
@@ -550,40 +551,35 @@ impl SourceTextureResolver {
 
         for texture in self.cache_texture_map {
             device.delete_texture(texture);
         }
     }
 
     fn end_pass(
         &mut self,
-        pass_index: usize,
-        pass_count: usize,
-        mut a8_texture: Option<Texture>,
-        mut rgba8_texture: Option<Texture>,
+        is_last: bool,
+        a8_texture: Option<Texture>,
+        rgba8_texture: Option<Texture>,
         a8_pool: &mut Vec<Texture>,
         rgba8_pool: &mut Vec<Texture>,
     ) {
         // If we have cache textures from previous pass, return them to the pool.
         rgba8_pool.extend(self.cache_rgba8_texture.take());
         a8_pool.extend(self.cache_a8_texture.take());
 
-        if pass_index == pass_count - 1 {
+        if is_last {
             // On the last pass, return the textures from this pass to the pool.
-            if let Some(texture) = rgba8_texture.take() {
-                rgba8_pool.push(texture);
-            }
-            if let Some(texture) = a8_texture.take() {
-                a8_pool.push(texture);
-            }
+            rgba8_pool.extend(rgba8_texture);
+            a8_pool.extend(a8_texture);
         } else {
             // We have another pass to process, make these textures available
             // as inputs to the next pass.
-            self.cache_rgba8_texture = rgba8_texture.take();
-            self.cache_a8_texture = a8_texture.take();
+            self.cache_rgba8_texture = rgba8_texture;
+            self.cache_a8_texture = a8_texture;
         }
     }
 
     // Bind a source texture to the device.
     fn bind(&self, texture_id: &SourceTexture, sampler: TextureSampler, device: &mut Device) {
         match *texture_id {
             SourceTexture::Invalid => {}
             SourceTexture::CacheA8 => {
@@ -1833,18 +1829,17 @@ impl Renderer {
                 backend.run(backend_profile_counters);
                 if let Some(ref thread_listener) = *thread_listener_for_render_backend {
                     thread_listener.thread_stopped(&thread_name);
                 }
             })
         };
 
         let gpu_cache_texture = CacheTexture::new(&mut device);
-
-        let gpu_profile = GpuProfiler::new(device.rc_gl());
+        let gpu_profile = GpuProfiler::new(Rc::clone(device.rc_gl()));
 
         let renderer = Renderer {
             result_rx,
             debug_server,
             device,
             current_frame: None,
             pending_texture_updates: Vec::new(),
             pending_gpu_cache_updates: Vec::new(),
@@ -2148,25 +2143,40 @@ impl Renderer {
             } else {
                 self.debug_flags.remove(DebugFlags::RENDER_TARGET_DBG);
             },
             DebugCommand::EnableAlphaRectsDebug(enable) => if enable {
                 self.debug_flags.insert(DebugFlags::ALPHA_PRIM_DBG);
             } else {
                 self.debug_flags.remove(DebugFlags::ALPHA_PRIM_DBG);
             },
+            DebugCommand::EnableGpuTimeQueries(enable) => if enable {
+                self.gpu_profile.enable_timers();
+            } else {
+                self.gpu_profile.disable_timers();
+            },
+            DebugCommand::EnableGpuSampleQueries(enable) => if enable {
+                self.gpu_profile.enable_samplers();
+            } else {
+                self.gpu_profile.disable_samplers();
+            },
             DebugCommand::FetchDocuments => {}
             DebugCommand::FetchClipScrollTree => {}
             DebugCommand::FetchPasses => {
                 let json = self.get_passes_for_debugger();
                 self.debug_server.send(json);
             }
         }
     }
 
+    pub fn toggle_queries_enabled(&mut self) {
+        self.gpu_profile.toggle_timers_enabled();
+        self.gpu_profile.toggle_samplers_enabled();
+    }
+
     /// Set a callback for handling external images.
     pub fn set_external_image_handler(&mut self, handler: Box<ExternalImageHandler>) {
         self.external_image_handler = Some(handler);
     }
 
     /// Set a callback for handling external outputs.
     pub fn set_output_image_handler(&mut self, handler: Box<OutputImageHandler>) {
         self.output_image_handler = Some(handler);
@@ -2184,40 +2194,36 @@ impl Renderer {
     /// A Frame is supplied by calling [`generate_frame()`][genframe].
     /// [genframe]: ../../webrender_api/struct.DocumentApi.html#method.generate_frame
     pub fn render(&mut self, framebuffer_size: DeviceUintSize) -> Result<(), Vec<RendererError>> {
         profile_scope!("render");
 
         if let Some(mut frame) = self.current_frame.take() {
             if let Some(ref mut frame) = frame.frame {
                 let mut profile_timers = RendererProfileTimers::new();
-                let mut profile_samplers = Vec::new();
-
-                {
-                    //Note: avoiding `self.gpu_profile.add_marker` - it would block here
-                    let _gm = GpuMarker::new(self.device.rc_gl(), "build samples");
+                let profile_samplers = {
+                    let _gm = self.gpu_profile.start_marker("build samples");
                     // Block CPU waiting for last frame's GPU profiles to arrive.
                     // In general this shouldn't block unless heavily GPU limited.
-                    if let Some((gpu_frame_id, timers, samplers)) = self.gpu_profile.build_samples()
-                    {
-                        if self.max_recorded_profiles > 0 {
-                            while self.gpu_profiles.len() >= self.max_recorded_profiles {
-                                self.gpu_profiles.pop_front();
-                            }
-                            self.gpu_profiles
-                                .push_back(GpuProfile::new(gpu_frame_id, &timers));
+                    let (gpu_frame_id, timers, samplers) = self.gpu_profile.build_samples();
+
+                    if self.max_recorded_profiles > 0 {
+                        while self.gpu_profiles.len() >= self.max_recorded_profiles {
+                            self.gpu_profiles.pop_front();
                         }
-                        profile_timers.gpu_samples = timers;
-                        profile_samplers = samplers;
+                        self.gpu_profiles
+                            .push_back(GpuProfile::new(gpu_frame_id, &timers));
                     }
-                }
+                    profile_timers.gpu_samples = timers;
+                    samplers
+                };
 
                 let cpu_frame_id = profile_timers.cpu_time.profile(|| {
                     let cpu_frame_id = {
-                        let _gm = GpuMarker::new(self.device.rc_gl(), "begin frame");
+                        let _gm = self.gpu_profile.start_marker("begin frame");
                         let frame_id = self.device.begin_frame(frame.device_pixel_ratio);
                         self.gpu_profile.begin_frame(frame_id);
 
                         self.device.disable_scissor();
                         self.device.disable_depth();
                         self.device.set_blend(false);
                         //self.update_shaders();
 
@@ -2252,40 +2258,43 @@ impl Renderer {
                         self.backend_profile_counters.total_time.get(),
                         profile_timers.cpu_time.get(),
                         self.profile_counters.draw_calls.get(),
                     );
                     self.cpu_profiles.push_back(cpu_profile);
                 }
 
                 if self.debug_flags.contains(DebugFlags::PROFILER_DBG) {
+                    let _gm = self.gpu_profile.start_marker("profile");
                     let screen_fraction = 1.0 / //TODO: take device/pixel ratio into equation?
                         (framebuffer_size.width as f32 * framebuffer_size.height as f32);
                     self.profiler.draw_profile(
-                        &mut self.device,
                         &frame.profile_counters,
                         &self.backend_profile_counters,
                         &self.profile_counters,
                         &mut profile_timers,
                         &profile_samplers,
                         screen_fraction,
                         &mut self.debug,
                     );
                 }
 
                 self.profile_counters.reset();
                 self.profile_counters.frame_counter.inc();
 
-                let debug_size = DeviceUintSize::new(
-                    framebuffer_size.width as u32,
-                    framebuffer_size.height as u32,
-                );
-                self.debug.render(&mut self.device, &debug_size);
                 {
-                    let _gm = GpuMarker::new(self.device.rc_gl(), "end frame");
+                    let _gm = self.gpu_profile.start_marker("debug");
+                    let debug_size = DeviceUintSize::new(
+                        framebuffer_size.width as u32,
+                        framebuffer_size.height as u32,
+                    );
+                    self.debug.render(&mut self.device, &debug_size);
+                }
+                {
+                    let _gm = self.gpu_profile.start_marker("end frame");
                     self.device.end_frame();
                 }
                 self.last_time = current_time;
             }
 
             // Restore frame - avoid borrow checker!
             self.current_frame = Some(frame);
         }
@@ -2299,27 +2308,27 @@ impl Renderer {
     pub fn layers_are_bouncing_back(&self) -> bool {
         match self.current_frame {
             None => false,
             Some(ref current_frame) => !current_frame.layers_bouncing_back.is_empty(),
         }
     }
 
     fn update_gpu_cache(&mut self, frame: &mut Frame) {
-        let _gm = GpuMarker::new(self.device.rc_gl(), "gpu cache update");
+        let _gm = self.gpu_profile.start_marker("gpu cache update");
         for update_list in self.pending_gpu_cache_updates.drain(..) {
             self.gpu_cache_texture
                 .update(&mut self.device, &update_list);
         }
         self.update_deferred_resolves(frame);
         self.gpu_cache_texture.flush(&mut self.device);
     }
 
     fn update_texture_cache(&mut self) {
-        let _gm = GpuMarker::new(self.device.rc_gl(), "texture cache update");
+        let _gm = self.gpu_profile.start_marker("texture cache update");
         let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]);
 
         for update_list in pending_texture_updates.drain(..) {
             for update in update_list.updates {
                 match update.op {
                     TextureUpdateOp::Create {
                         width,
                         height,
@@ -2706,17 +2715,17 @@ impl Renderer {
 
                 // Restore draw target to current pass render target + layer.
                 self.device
                     .bind_draw_target(render_target, Some(target_dimensions));
             }
             _ => {}
         }
 
-        let _gm = self.gpu_profile.add_marker(marker);
+        let _timer = self.gpu_profile.start_timer(marker);
         self.draw_instanced_batch(instances, VertexArrayKind::Primitive, &key.textures);
     }
 
     fn handle_scaling(
         &mut self,
         render_tasks: &RenderTaskTree,
         scalings: &Vec<ScalingInfo>,
         source: SourceTexture,
@@ -2745,17 +2754,17 @@ impl Renderer {
         target: &ColorRenderTarget,
         target_size: DeviceUintSize,
         clear_color: Option<[f32; 4]>,
         render_tasks: &RenderTaskTree,
         projection: &Transform3D<f32>,
         frame_id: FrameId,
     ) {
         {
-            let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_TARGET);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_TARGET);
             self.device
                 .bind_draw_target(render_target, Some(target_size));
             self.device.disable_depth();
             self.device.enable_depth_write();
             self.device.set_blend(false);
             match render_target {
                 Some(..) if self.enable_clear_scissor => {
                     // TODO(gw): Applying a scissor rect and minimal clear here
@@ -2776,17 +2785,17 @@ impl Renderer {
 
         // Draw any blurs for this target.
         // Blurs are rendered as a standard 2-pass
         // separable implementation.
         // TODO(gw): In the future, consider having
         //           fast path blur shaders for common
         //           blur radii with fixed weights.
         if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
-            let _gm = self.gpu_profile.add_marker(GPU_TAG_BLUR);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);
 
             self.device.set_blend(false);
             self.cs_blur_rgba8
                 .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
 
             if !target.vertical_blurs.is_empty() {
                 self.draw_instanced_batch(
                     &target.vertical_blurs,
@@ -2811,51 +2820,51 @@ impl Renderer {
         // for shadow support. In the future it may be worth
         // considering using this for (some) other text runs, since
         // it removes the overhead of submitting many small glyphs
         // to multiple tiles in the normal text run case.
         if !target.text_run_cache_prims.is_empty() {
             self.device.set_blend(true);
             self.device.set_blend_mode_premultiplied_alpha();
 
-            let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_TEXT_RUN);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_TEXT_RUN);
             self.cs_text_run
                 .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             for (texture_id, instances) in &target.text_run_cache_prims {
                 self.draw_instanced_batch(
                     instances,
                     VertexArrayKind::Primitive,
                     &BatchTextures::color(*texture_id),
                 );
             }
         }
         if !target.line_cache_prims.is_empty() {
             // TODO(gw): Technically, we don't need blend for solid
             //           lines. We could check that here?
             self.device.set_blend(true);
             self.device.set_blend_mode_premultiplied_alpha();
 
-            let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_LINE);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_LINE);
             self.cs_line
                 .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             self.draw_instanced_batch(
                 &target.line_cache_prims,
                 VertexArrayKind::Primitive,
                 &BatchTextures::no_texture(),
             );
         }
 
         //TODO: record the pixel count for cached primitives
 
         if !target.alpha_batcher.is_empty() {
-            let _gm2 = GpuMarker::new(self.device.rc_gl(), "alpha batches");
+            let _gl = self.gpu_profile.start_marker("alpha batches");
             self.device.set_blend(false);
             let mut prev_blend_mode = BlendMode::None;
 
-            self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_OPAQUE);
+            let opaque_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
 
             //Note: depth equality is needed for split planes
             self.device.set_depth_func(DepthFunction::LessEqual);
             self.device.enable_depth();
             self.device.enable_depth_write();
 
             // Draw opaque batches front-to-back for maximum
             // z-buffer efficiency!
@@ -2873,17 +2882,18 @@ impl Renderer {
                     &projection,
                     render_tasks,
                     render_target,
                     target_size,
                 );
             }
 
             self.device.disable_depth_write();
-            self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
+            self.gpu_profile.finish_sampler(opaque_sampler);
+            let transparent_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
 
             for batch in &target.alpha_batcher.batch_list.alpha_batch_list.batches {
                 if self.debug_flags.contains(DebugFlags::ALPHA_PRIM_DBG) {
                     let color = match batch.key.blend_mode {
                         BlendMode::None => debug_colors::BLACK,
                         BlendMode::PremultipliedAlpha => debug_colors::GREY,
                         BlendMode::PremultipliedDestOut => debug_colors::SALMON,
                         BlendMode::SubpixelConstantTextColor(..) => debug_colors::GREEN,
@@ -2899,17 +2909,17 @@ impl Renderer {
                     BatchKind::Transformable(transform_kind, TransformBatchKind::TextRun(glyph_format)) => {
                         // Text run batches are handled by this special case branch.
                         // In the case of subpixel text, we draw it as a two pass
                         // effect, to ensure we can apply clip masks correctly.
                         // In the future, there are several optimizations available:
                         // 1) Use dual source blending where available (almost all recent hardware).
                         // 2) Use frame buffer fetch where available (most modern hardware).
                         // 3) Consider the old constant color blend method where no clip is applied.
-                        let _gm = self.gpu_profile.add_marker(GPU_TAG_PRIM_TEXT_RUN);
+                        let _timer = self.gpu_profile.start_timer(GPU_TAG_PRIM_TEXT_RUN);
 
                         self.device.set_blend(true);
 
                         match batch.key.blend_mode {
                             BlendMode::PremultipliedAlpha => {
                                 self.device.set_blend_mode_premultiplied_alpha();
 
                                 self.ps_text_run.bind(
@@ -3073,17 +3083,17 @@ impl Renderer {
                             target_size,
                         );
                     }
                 }
             }
 
             self.device.disable_depth();
             self.device.set_blend(false);
-            self.gpu_profile.done_sampler();
+            self.gpu_profile.finish_sampler(transparent_sampler);
         }
 
         // For any registered image outputs on this render target,
         // get the texture from caller and blit it.
         for output in &target.outputs {
             let handler = self.output_image_handler
                 .as_mut()
                 .expect("Found output image, but no handler set!");
@@ -3118,20 +3128,20 @@ impl Renderer {
     fn draw_alpha_target(
         &mut self,
         render_target: (&Texture, i32),
         target: &AlphaRenderTarget,
         target_size: DeviceUintSize,
         projection: &Transform3D<f32>,
         render_tasks: &RenderTaskTree,
     ) {
-        self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_ALPHA);
+        let alpha_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_ALPHA);
 
         {
-            let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_TARGET);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_TARGET);
             self.device
                 .bind_draw_target(Some(render_target), Some(target_size));
             self.device.disable_depth();
             self.device.disable_depth_write();
 
             // TODO(gw): Applying a scissor rect and minimal clear here
             // is a very large performance win on the Intel and nVidia
             // GPUs that I have tested with. It's possible it may be a
@@ -3152,17 +3162,17 @@ impl Renderer {
 
         // Draw any blurs for this target.
         // Blurs are rendered as a standard 2-pass
         // separable implementation.
         // TODO(gw): In the future, consider having
         //           fast path blur shaders for common
         //           blur radii with fixed weights.
         if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
-            let _gm = self.gpu_profile.add_marker(GPU_TAG_BLUR);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);
 
             self.device.set_blend(false);
             self.cs_blur_a8
                 .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
 
             if !target.vertical_blurs.is_empty() {
                 self.draw_instanced_batch(
                     &target.vertical_blurs,
@@ -3180,61 +3190,61 @@ impl Renderer {
             }
         }
 
         self.handle_scaling(render_tasks, &target.scalings, SourceTexture::CacheA8);
 
         if !target.brush_mask_corners.is_empty() {
             self.device.set_blend(false);
 
-            let _gm = self.gpu_profile.add_marker(GPU_TAG_BRUSH_MASK);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_BRUSH_MASK);
             self.brush_mask_corner
                 .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             self.draw_instanced_batch(
                 &target.brush_mask_corners,
                 VertexArrayKind::Primitive,
                 &BatchTextures::no_texture(),
             );
         }
 
         if !target.brush_mask_rounded_rects.is_empty() {
             self.device.set_blend(false);
 
-            let _gm = self.gpu_profile.add_marker(GPU_TAG_BRUSH_MASK);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_BRUSH_MASK);
             self.brush_mask_rounded_rect
                 .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             self.draw_instanced_batch(
                 &target.brush_mask_rounded_rects,
                 VertexArrayKind::Primitive,
                 &BatchTextures::no_texture(),
             );
         }
 
         // Draw the clip items into the tiled alpha mask.
         {
-            let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_CLIP);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_CLIP);
 
             // If we have border corner clips, the first step is to clear out the
             // area in the clip mask. This allows drawing multiple invididual clip
             // in regions below.
             if !target.clip_batcher.border_clears.is_empty() {
-                let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip borders [clear]");
+                let _gm = self.gpu_profile.start_marker("clip borders [clear]");
                 self.device.set_blend(false);
                 self.cs_clip_border
                     .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
                 self.draw_instanced_batch(
                     &target.clip_batcher.border_clears,
                     VertexArrayKind::Clip,
                     &BatchTextures::no_texture(),
                 );
             }
 
             // Draw any dots or dashes for border corners.
             if !target.clip_batcher.borders.is_empty() {
-                let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip borders");
+                let _gm = self.gpu_profile.start_marker("clip borders");
                 // We are masking in parts of the corner (dots or dashes) here.
                 // Blend mode is set to max to allow drawing multiple dots.
                 // The individual dots and dashes in a border never overlap, so using
                 // a max blend mode here is fine.
                 self.device.set_blend(true);
                 self.device.set_blend_mode_max();
                 self.cs_clip_border
                     .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
@@ -3246,60 +3256,60 @@ impl Renderer {
             }
 
             // switch to multiplicative blending
             self.device.set_blend(true);
             self.device.set_blend_mode_multiply();
 
             // draw rounded cornered rectangles
             if !target.clip_batcher.rectangles.is_empty() {
-                let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip rectangles");
+                let _gm = self.gpu_profile.start_marker("clip rectangles");
                 self.cs_clip_rectangle.bind(
                     &mut self.device,
                     projection,
                     0,
                     &mut self.renderer_errors,
                 );
                 self.draw_instanced_batch(
                     &target.clip_batcher.rectangles,
                     VertexArrayKind::Clip,
                     &BatchTextures::no_texture(),
                 );
             }
             // draw image masks
             for (mask_texture_id, items) in target.clip_batcher.images.iter() {
-                let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip images");
+                let _gm = self.gpu_profile.start_marker("clip images");
                 let textures = BatchTextures {
                     colors: [
                         mask_texture_id.clone(),
                         SourceTexture::Invalid,
                         SourceTexture::Invalid,
                     ],
                 };
                 self.cs_clip_image
                     .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
                 self.draw_instanced_batch(items, VertexArrayKind::Clip, &textures);
             }
         }
 
-        self.gpu_profile.done_sampler();
+        self.gpu_profile.finish_sampler(alpha_sampler);
     }
 
     fn update_deferred_resolves(&mut self, frame: &mut Frame) {
         // The first thing we do is run through any pending deferred
         // resolves, and use a callback to get the UV rect for this
         // custom item. Then we patch the resource_rects structure
         // here before it's uploaded to the GPU.
         if !frame.deferred_resolves.is_empty() {
             let handler = self.external_image_handler
                 .as_mut()
                 .expect("Found external image, but no handler set!");
 
             for deferred_resolve in &frame.deferred_resolves {
-                GpuMarker::fire(self.device.gl(), "deferred resolve");
+                self.gpu_profile.place_marker("deferred resolve");
                 let props = &deferred_resolve.image_properties;
                 let ext_image = props
                     .external_image
                     .expect("BUG: Deferred resolves must be external images!");
                 let image = handler.lock(ext_image.id, ext_image.channel_index);
                 let texture_target = match ext_image.image_type {
                     ExternalImageType::Texture2DHandle => TextureTarget::Default,
                     ExternalImageType::Texture2DArrayHandle => TextureTarget::Array,
@@ -3360,17 +3370,17 @@ impl Renderer {
 
             for (ext_data, _) in self.texture_resolver.external_images.drain() {
                 handler.unlock(ext_data.0, ext_data.1);
             }
         }
     }
 
     fn start_frame(&mut self, frame: &mut Frame) {
-        let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_DATA);
+        let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_DATA);
 
         // Assign render targets to the passes.
         for pass in &mut frame.passes {
             debug_assert!(pass.color_texture.is_none());
             debug_assert!(pass.alpha_texture.is_none());
 
             if pass.needs_render_target_kind(RenderTargetKind::Color) {
                 pass.color_texture = Some(
@@ -3442,34 +3452,37 @@ impl Renderer {
     }
 
     fn draw_tile_frame(
         &mut self,
         frame: &mut Frame,
         framebuffer_size: DeviceUintSize,
         frame_id: FrameId,
     ) {
-        let _gm = GpuMarker::new(self.device.rc_gl(), "tile frame draw");
+        let _gm = self.gpu_profile.start_marker("tile frame draw");
 
         // Some tests use a restricted viewport smaller than the main screen size.
         // Ensure we clear the framebuffer in these tests.
         // TODO(gw): Find a better solution for this?
         let needs_clear = frame.window_size.width < framebuffer_size.width ||
             frame.window_size.height < framebuffer_size.height;
 
         self.device.disable_depth_write();
         self.device.disable_stencil();
         self.device.set_blend(false);
 
         if frame.passes.is_empty() {
             self.device
                 .clear_target(Some(self.clear_color.to_array()), Some(1.0));
         } else {
             self.start_frame(frame);
+
             let pass_count = frame.passes.len();
+            let base_color_target_count = self.color_render_targets.len();
+            let base_alpha_target_count = self.alpha_render_targets.len();
 
             for (pass_index, pass) in frame.passes.iter_mut().enumerate() {
                 self.texture_resolver.bind(
                     &SourceTexture::CacheA8,
                     TextureSampler::CacheA8,
                     &mut self.device,
                 );
                 self.texture_resolver.bind(
@@ -3544,18 +3557,17 @@ impl Renderer {
                         clear_color,
                         &frame.render_tasks,
                         &projection,
                         frame_id,
                     );
                 }
 
                 self.texture_resolver.end_pass(
-                    pass_index,
-                    pass_count,
+                    pass_index == pass_count - 1,
                     pass.alpha_texture.take(),
                     pass.color_texture.take(),
                     &mut self.alpha_render_targets,
                     &mut self.color_render_targets,
                 );
 
                 // After completing the first pass, make the A8 target available as an
                 // input to any subsequent passes.
@@ -3564,18 +3576,18 @@ impl Renderer {
                         self.texture_resolver.resolve(&SourceTexture::CacheA8)
                     {
                         self.device
                             .bind_texture(TextureSampler::SharedCacheA8, shared_alpha_texture);
                     }
                 }
             }
 
-            self.color_render_targets.reverse();
-            self.alpha_render_targets.reverse();
+            self.color_render_targets[base_color_target_count..].reverse();
+            self.alpha_render_targets[base_alpha_target_count..].reverse();
             self.draw_render_target_debug(framebuffer_size);
             self.draw_texture_cache_debug(framebuffer_size);
 
             // Garbage collect any frame outputs that weren't used this frame.
             let device = &mut self.device;
             self.output_targets
                 .retain(|_, target| if target.last_access != frame_id {
                     device.delete_fbo(target.fbo_id);
--- a/gfx/webrender/src/scene.rs
+++ b/gfx/webrender/src/scene.rs
@@ -37,46 +37,49 @@ impl SceneProperties {
             self.float_properties
                 .insert(property.key.id, property.value);
         }
     }
 
     /// Get the current value for a transform property.
     pub fn resolve_layout_transform(
         &self,
-        property: Option<&PropertyBinding<LayoutTransform>>,
+        property: &PropertyBinding<LayoutTransform>,
     ) -> LayoutTransform {
-        let property = match property {
-            Some(property) => property,
-            None => return LayoutTransform::identity(),
-        };
-
         match *property {
-            PropertyBinding::Value(matrix) => matrix,
-            PropertyBinding::Binding(ref key) => self.transform_properties
-                .get(&key.id)
-                .cloned()
-                .unwrap_or_else(|| {
-                    warn!("Property binding {:?} has an invalid value.", key);
-                    LayoutTransform::identity()
-                }),
+            PropertyBinding::Value(value) => value,
+            PropertyBinding::Binding(ref key) => {
+                self.transform_properties
+                    .get(&key.id)
+                    .cloned()
+                    .unwrap_or_else(|| {
+                        warn!("Property binding {:?} has an invalid value.", key);
+                        LayoutTransform::identity()
+                    })
+            }
         }
     }
 
     /// Get the current value for a float property.
-    pub fn resolve_float(&self, property: &PropertyBinding<f32>, default_value: f32) -> f32 {
+    pub fn resolve_float(
+        &self,
+        property: &PropertyBinding<f32>,
+        default_value: f32
+    ) -> f32 {
         match *property {
             PropertyBinding::Value(value) => value,
-            PropertyBinding::Binding(ref key) => self.float_properties
-                .get(&key.id)
-                .cloned()
-                .unwrap_or_else(|| {
-                    warn!("Property binding {:?} has an invalid value.", key);
-                    default_value
-                }),
+            PropertyBinding::Binding(ref key) => {
+                self.float_properties
+                    .get(&key.id)
+                    .cloned()
+                    .unwrap_or_else(|| {
+                        warn!("Property binding {:?} has an invalid value.", key);
+                        default_value
+                    })
+            }
         }
     }
 }
 
 /// A representation of the layout within the display port for a given document or iframe.
 pub struct ScenePipeline {
     pub pipeline_id: PipelineId,
     pub epoch: Epoch,
@@ -136,79 +139,80 @@ impl Scene {
 
     pub fn update_epoch(&mut self, pipeline_id: PipelineId, epoch: Epoch) {
         if let Some(pipeline) = self.pipelines.get_mut(&pipeline_id) {
             pipeline.epoch = epoch;
         }
     }
 }
 
+/// An arbitrary number which we assume opacity is invisible below.
+pub const OPACITY_EPSILON: f32 = 0.001;
+
 pub trait FilterOpHelpers {
-    fn resolve(self, properties: &SceneProperties) -> FilterOp;
+    fn is_visible(&self) -> bool;
     fn is_noop(&self) -> bool;
 }
 
 impl FilterOpHelpers for FilterOp {
-    fn resolve(self, properties: &SceneProperties) -> FilterOp {
-        match self {
-            FilterOp::Opacity(ref value) => {
-                let amount = properties.resolve_float(value, 1.0);
-                FilterOp::Opacity(PropertyBinding::Value(amount))
+    fn is_visible(&self) -> bool {
+        match *self {
+            FilterOp::Blur(..) |
+            FilterOp::Brightness(..) |
+            FilterOp::Contrast(..) |
+            FilterOp::Grayscale(..) |
+            FilterOp::HueRotate(..) |
+            FilterOp::Invert(..) |
+            FilterOp::Saturate(..) |
+            FilterOp::Sepia(..) => true,
+            FilterOp::Opacity(_, amount) => {
+                amount > OPACITY_EPSILON
             }
-            _ => self,
         }
     }
 
     fn is_noop(&self) -> bool {
         match *self {
             FilterOp::Blur(length) => length == 0.0,
             FilterOp::Brightness(amount) => amount == 1.0,
             FilterOp::Contrast(amount) => amount == 1.0,
             FilterOp::Grayscale(amount) => amount == 0.0,
             FilterOp::HueRotate(amount) => amount == 0.0,
             FilterOp::Invert(amount) => amount == 0.0,
-            FilterOp::Opacity(value) => match value {
-                PropertyBinding::Value(amount) => amount == 1.0,
-                PropertyBinding::Binding(..) => {
-                    panic!("bug: binding value should be resolved");
-                }
-            },
+            FilterOp::Opacity(_, amount) => amount >= 1.0,
             FilterOp::Saturate(amount) => amount == 1.0,
             FilterOp::Sepia(amount) => amount == 0.0,
         }
     }
 }
 
 pub trait StackingContextHelpers {
     fn mix_blend_mode_for_compositing(&self) -> Option<MixBlendMode>;
     fn filter_ops_for_compositing(
         &self,
         display_list: &BuiltDisplayList,
         input_filters: ItemRange<FilterOp>,
-        properties: &SceneProperties,
     ) -> Vec<FilterOp>;
 }
 
 impl StackingContextHelpers for StackingContext {
     fn mix_blend_mode_for_compositing(&self) -> Option<MixBlendMode> {
         match self.mix_blend_mode {
             MixBlendMode::Normal => None,
             _ => Some(self.mix_blend_mode),
         }
     }
 
     fn filter_ops_for_compositing(
         &self,
         display_list: &BuiltDisplayList,
         input_filters: ItemRange<FilterOp>,
-        properties: &SceneProperties,
     ) -> Vec<FilterOp> {
+        // TODO(gw): Now that we resolve these later on,
+        //           we could probably make it a bit
+        //           more efficient than cloning these here.
         let mut filters = vec![];
         for filter in display_list.get(input_filters) {
-            let filter = filter.resolve(properties);
-            if filter.is_noop() {
-                continue;
-            }
             filters.push(filter);
         }
         filters
     }
 }
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -1,47 +1,96 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadiusKind, ClipId, ColorF, DeviceIntPoint, ImageKey};
 use api::{DeviceIntRect, DeviceIntSize, DeviceUintPoint, DeviceUintSize};
 use api::{ExternalImageType, FilterOp, FontRenderMode, ImageRendering, LayerRect};
-use api::{MixBlendMode, PipelineId, PropertyBinding, TransformStyle};
-use api::{LayerVector2D, TileOffset, YuvColorSpace, YuvFormat};
+use api::{MixBlendMode, PipelineId};
+use api::{TileOffset, YuvColorSpace, YuvFormat};
+use api::{LayerToWorldTransform, WorldPixel};
 use border::{BorderCornerInstance, BorderCornerSide};
 use clip::{ClipSource, ClipStore};
-use clip_scroll_tree::CoordinateSystemId;
+use clip_scroll_tree::{ClipScrollTree, CoordinateSystemId};
 use device::Texture;
+use euclid::{TypedTransform3D, vec3};
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle, GpuCacheUpdateList};
 use gpu_types::{BlurDirection, BlurInstance, BrushInstance, BrushImageKind, ClipMaskInstance};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
 use gpu_types::{BRUSH_FLAG_USES_PICTURE, ClipScrollNodeIndex, ClipScrollNodeData};
 use internal_types::{FastHashMap, SourceTexture};
 use internal_types::BatchTextures;
-use picture::PictureKind;
+use picture::{PictureCompositeMode, PictureKind, PicturePrimitive};
+use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
 use prim_store::{BrushMaskKind, BrushKind, DeferredResolve, PrimitiveRun, RectangleContent};
 use profiler::FrameProfileCounters;
-use render_task::{AlphaRenderItem, ClipWorkItem, MaskGeometryKind, MaskSegment};
+use render_task::{ClipWorkItem, MaskGeometryKind, MaskSegment};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKey, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, RenderTaskLocation, RenderTaskTree};
 use renderer::BlendMode;
 use renderer::ImageBufferKind;
 use resource_cache::{GlyphFetchResult, ResourceCache};
 use std::{cmp, usize, f32, i32};
 use texture_allocator::GuillotineAllocator;
 use util::{MatrixHelpers, TransformedRectKind};
 
 // Special sentinel value recognized by the shader. It is considered to be
 // a dummy task that doesn't mask out anything.
 const OPAQUE_TASK_ADDRESS: RenderTaskAddress = RenderTaskAddress(i32::MAX as u32);
 const MIN_TARGET_SIZE: u32 = 2048;
 
+// Helper to add an entire primitive run to a batch list.
+// TODO(gw): Restructure this so the param list isn't quite
+//           so daunting!
+impl PrimitiveRun {
+    fn add_to_batch(
+        &self,
+        clip_id: ClipScrollNodeIndex,
+        scroll_id: ClipScrollNodeIndex,
+        batch_list: &mut BatchList,
+        ctx: &RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        render_tasks: &RenderTaskTree,
+        task_id: RenderTaskId,
+        task_address: RenderTaskAddress,
+        deferred_resolves: &mut Vec<DeferredResolve>,
+        glyph_fetch_buffer: &mut Vec<GlyphFetchResult>,
+        splitter: &mut BspSplitter<f64, WorldPixel>,
+    ) {
+        for i in 0 .. self.count {
+            let prim_index = PrimitiveIndex(self.base_prim_index.0 + i);
+
+            let md = &ctx.prim_store.cpu_metadata[prim_index.0];
+
+            // Now that we walk the primitive runs in order to add
+            // items to batches, we need to check if they are
+            // visible here.
+            if md.screen_rect.is_some() {
+                add_to_batch(
+                    clip_id,
+                    scroll_id,
+                    prim_index,
+                    batch_list,
+                    ctx,
+                    gpu_cache,
+                    render_tasks,
+                    task_id,
+                    task_address,
+                    deferred_resolves,
+                    glyph_fetch_buffer,
+                    splitter,
+                );
+            }
+        }
+    }
+}
+
 trait AlphaBatchHelpers {
     fn get_blend_mode(
         &self,
         metadata: &PrimitiveMetadata,
         transform_kind: TransformedRectKind,
     ) -> BlendMode;
 }
 
@@ -106,23 +155,16 @@ impl AlphaBatchHelpers for PrimitiveStor
 
 #[derive(Debug)]
 pub struct ScrollbarPrimitive {
     pub clip_id: ClipId,
     pub prim_index: PrimitiveIndex,
     pub frame_rect: LayerRect,
 }
 
-#[derive(Debug)]
-pub enum PrimitiveRunCmd {
-    PushStackingContext(StackingContextIndex),
-    PopStackingContext,
-    PrimitiveRun(PrimitiveRun),
-}
-
 #[derive(Debug, Copy, Clone)]
 pub struct RenderTargetIndex(pub usize);
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub struct RenderPassIndex(isize);
 
 #[derive(Debug)]
 struct DynamicTaskInfo {
@@ -203,33 +245,53 @@ impl AlphaBatchList {
         let batch = &mut self.batches[selected_batch_index.unwrap()];
         batch.item_rects.push(*item_bounding_rect);
 
         &mut batch.instances
     }
 }
 
 pub struct OpaqueBatchList {
+    pub pixel_area_threshold_for_new_batch: i32,
     pub batches: Vec<OpaquePrimitiveBatch>,
 }
 
 impl OpaqueBatchList {
-    fn new() -> OpaqueBatchList {
+    fn new(pixel_area_threshold_for_new_batch: i32) -> OpaqueBatchList {
         OpaqueBatchList {
             batches: Vec::new(),
+            pixel_area_threshold_for_new_batch,
         }
     }
 
-    fn get_suitable_batch(&mut self, key: BatchKey) -> &mut Vec<PrimitiveInstance> {
+    fn get_suitable_batch(
+        &mut self,
+        key: BatchKey,
+        item_bounding_rect: &DeviceIntRect
+    ) -> &mut Vec<PrimitiveInstance> {
         let mut selected_batch_index = None;
+        let item_area = item_bounding_rect.size.area();
 
-        for (batch_index, batch) in self.batches.iter().enumerate().rev().take(10) {
-            if batch.key.is_compatible_with(&key) {
-                selected_batch_index = Some(batch_index);
-                break;
+        // If the area of this primitive is larger than the given threshold,
+        // then it is large enough to warrant breaking a batch for. In this
+        // case we just see if it can be added to the existing batch or
+        // create a new one.
+        if item_area > self.pixel_area_threshold_for_new_batch {
+            if let Some(ref batch) = self.batches.last() {
+                if batch.key.is_compatible_with(&key) {
+                    selected_batch_index = Some(self.batches.len() - 1);
+                }
+            }
+        } else {
+            // Otherwise, look back through a reasonable number of batches.
+            for (batch_index, batch) in self.batches.iter().enumerate().rev().take(10) {
+                if batch.key.is_compatible_with(&key) {
+                    selected_batch_index = Some(batch_index);
+                    break;
+                }
             }
         }
 
         if selected_batch_index.is_none() {
             let new_batch = OpaquePrimitiveBatch::new(key);
             selected_batch_index = Some(self.batches.len());
             self.batches.push(new_batch);
         }
@@ -253,30 +315,37 @@ impl OpaqueBatchList {
 }
 
 pub struct BatchList {
     pub alpha_batch_list: AlphaBatchList,
     pub opaque_batch_list: OpaqueBatchList,
 }
 
 impl BatchList {
-    fn new() -> BatchList {
+    fn new(screen_size: DeviceIntSize) -> BatchList {
+        // The threshold for creating a new batch is
+        // one quarter the screen size.
+        let batch_area_threshold = screen_size.width * screen_size.height / 4;
+
         BatchList {
             alpha_batch_list: AlphaBatchList::new(),
-            opaque_batch_list: OpaqueBatchList::new(),
+            opaque_batch_list: OpaqueBatchList::new(batch_area_threshold),
         }
     }
 
     fn get_suitable_batch(
         &mut self,
         key: BatchKey,
         item_bounding_rect: &DeviceIntRect,
     ) -> &mut Vec<PrimitiveInstance> {
         match key.blend_mode {
-            BlendMode::None => self.opaque_batch_list.get_suitable_batch(key),
+            BlendMode::None => {
+                self.opaque_batch_list
+                    .get_suitable_batch(key, item_bounding_rect)
+            }
             BlendMode::PremultipliedAlpha |
             BlendMode::PremultipliedDestOut |
             BlendMode::SubpixelConstantTextColor(..) |
             BlendMode::SubpixelVariableTextColor |
             BlendMode::SubpixelWithBgColor => {
                 self.alpha_batch_list
                     .get_suitable_batch(key, item_bounding_rect)
             }
@@ -290,514 +359,656 @@ impl BatchList {
 
 /// Encapsulates the logic of building batches for items that are blended.
 pub struct AlphaBatcher {
     pub batch_list: BatchList,
     tasks: Vec<RenderTaskId>,
     glyph_fetch_buffer: Vec<GlyphFetchResult>,
 }
 
-impl AlphaRenderItem {
-    fn add_to_batch(
-        &self,
-        batch_list: &mut BatchList,
-        ctx: &RenderTargetContext,
-        gpu_cache: &mut GpuCache,
-        render_tasks: &RenderTaskTree,
-        task_id: RenderTaskId,
-        task_address: RenderTaskAddress,
-        deferred_resolves: &mut Vec<DeferredResolve>,
-        glyph_fetch_buffer: &mut Vec<GlyphFetchResult>,
-    ) {
-        match *self {
-            AlphaRenderItem::Blend(stacking_context_index, src_id, filter, z) => {
-                let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
-                let key = BatchKey::new(
-                    BatchKind::Blend,
-                    BlendMode::PremultipliedAlpha,
-                    BatchTextures::no_texture(),
-                );
-                let src_task_address = render_tasks.get_task_address(src_id);
+// A free function that adds a primitive to a batch.
+// It can recursively call itself in some situations, for
+// example if it encounters a picture where the items
+// in that picture are being drawn into the same target.
+fn add_to_batch(
+    clip_id: ClipScrollNodeIndex,
+    scroll_id: ClipScrollNodeIndex,
+    prim_index: PrimitiveIndex,
+    batch_list: &mut BatchList,
+    ctx: &RenderTargetContext,
+    gpu_cache: &mut GpuCache,
+    render_tasks: &RenderTaskTree,
+    task_id: RenderTaskId,
+    task_address: RenderTaskAddress,
+    deferred_resolves: &mut Vec<DeferredResolve>,
+    glyph_fetch_buffer: &mut Vec<GlyphFetchResult>,
+    splitter: &mut BspSplitter<f64, WorldPixel>,
+) {
+    let z = prim_index.0 as i32;
+    let prim_metadata = ctx.prim_store.get_metadata(prim_index);
+    let scroll_node = &ctx.node_data[scroll_id.0 as usize];
+    // TODO(gw): Calculating this for every primitive is a bit
+    //           wasteful. We should probably cache this in
+    //           the scroll node...
+    let transform_kind = scroll_node.transform.transform_kind();
+    let item_bounding_rect = prim_metadata.screen_rect.as_ref().unwrap();
+    let prim_cache_address = gpu_cache.get_address(&prim_metadata.gpu_location);
+    let no_textures = BatchTextures::no_texture();
+    let clip_task_address = prim_metadata
+        .clip_task_id
+        .map_or(OPAQUE_TASK_ADDRESS, |id| render_tasks.get_task_address(id));
+    let base_instance = SimplePrimitiveInstance::new(
+        prim_cache_address,
+        task_address,
+        clip_task_address,
+        clip_id,
+        scroll_id,
+        z,
+    );
 
-                let (filter_mode, amount) = match filter {
-                    FilterOp::Blur(..) => (0, 0.0),
-                    FilterOp::Contrast(amount) => (1, amount),
-                    FilterOp::Grayscale(amount) => (2, amount),
-                    FilterOp::HueRotate(angle) => (3, angle),
-                    FilterOp::Invert(amount) => (4, amount),
-                    FilterOp::Saturate(amount) => (5, amount),
-                    FilterOp::Sepia(amount) => (6, amount),
-                    FilterOp::Brightness(amount) => (7, amount),
-                    FilterOp::Opacity(PropertyBinding::Value(amount)) => (8, amount),
-                    FilterOp::Opacity(_) => unreachable!(),
-                };
+    let blend_mode = ctx.prim_store.get_blend_mode(prim_metadata, transform_kind);
 
-                let amount = (amount * 65535.0).round() as i32;
-                let batch = batch_list.get_suitable_batch(key, &stacking_context.screen_bounds);
+    match prim_metadata.prim_kind {
+        PrimitiveKind::Brush => {
+            panic!("BUG: brush type not expected in an alpha task (yet)");
+        }
+        PrimitiveKind::Border => {
+            let border_cpu =
+                &ctx.prim_store.cpu_borders[prim_metadata.cpu_prim_index.0];
+            // TODO(gw): Select correct blend mode for edges and corners!!
+            let corner_kind = BatchKind::Transformable(
+                transform_kind,
+                TransformBatchKind::BorderCorner,
+            );
+            let corner_key = BatchKey::new(corner_kind, blend_mode, no_textures);
+            let edge_kind = BatchKind::Transformable(
+                transform_kind,
+                TransformBatchKind::BorderEdge,
+            );
+            let edge_key = BatchKey::new(edge_kind, blend_mode, no_textures);
 
-                let instance = CompositePrimitiveInstance::new(
-                    task_address,
-                    src_task_address,
-                    RenderTaskAddress(0),
-                    filter_mode,
-                    amount,
-                    z,
-                    0,
-                    0,
-                );
-
-                batch.push(PrimitiveInstance::from(instance));
+            // Work around borrow ck on borrowing batch_list twice.
+            {
+                let batch =
+                    batch_list.get_suitable_batch(corner_key, item_bounding_rect);
+                for (i, instance_kind) in border_cpu.corner_instances.iter().enumerate()
+                {
+                    let sub_index = i as i32;
+                    match *instance_kind {
+                        BorderCornerInstance::None => {}
+                        BorderCornerInstance::Single => {
+                            batch.push(base_instance.build(
+                                sub_index,
+                                BorderCornerSide::Both as i32,
+                                0,
+                            ));
+                        }
+                        BorderCornerInstance::Double => {
+                            batch.push(base_instance.build(
+                                sub_index,
+                                BorderCornerSide::First as i32,
+                                0,
+                            ));
+                            batch.push(base_instance.build(
+                                sub_index,
+                                BorderCornerSide::Second as i32,
+                                0,
+                            ));
+                        }
+                    }
+                }
             }
-            AlphaRenderItem::HardwareComposite(
-                stacking_context_index,
-                src_id,
-                composite_op,
-                screen_origin,
-                z,
-                dest_rect,
-            ) => {
-                let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
-                let src_task_address = render_tasks.get_task_address(src_id);
-                let key = BatchKey::new(
-                    BatchKind::HardwareComposite,
-                    composite_op.to_blend_mode(),
-                    BatchTextures::no_texture(),
-                );
-                let batch = batch_list.get_suitable_batch(key, &stacking_context.screen_bounds);
-                let dest_rect = if dest_rect.width > 0 && dest_rect.height > 0 {
-                    dest_rect
-                } else {
-                    render_tasks.get(src_id).get_dynamic_size()
-                };
 
-                let instance = CompositePrimitiveInstance::new(
-                    task_address,
-                    src_task_address,
-                    RenderTaskAddress(0),
-                    screen_origin.x,
-                    screen_origin.y,
-                    z,
-                    dest_rect.width,
-                    dest_rect.height,
-                );
+            let batch = batch_list.get_suitable_batch(edge_key, item_bounding_rect);
+            for border_segment in 0 .. 4 {
+                batch.push(base_instance.build(border_segment, 0, 0));
+            }
+        }
+        PrimitiveKind::Rectangle => {
+            let needs_clipping = prim_metadata.clip_task_id.is_some();
+            let kind = BatchKind::Transformable(
+                transform_kind,
+                TransformBatchKind::Rectangle(needs_clipping),
+            );
+            let key = BatchKey::new(kind, blend_mode, no_textures);
+            let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
+            batch.push(base_instance.build(0, 0, 0));
+        }
+        PrimitiveKind::Line => {
+            let kind =
+                BatchKind::Transformable(transform_kind, TransformBatchKind::Line);
+            let key = BatchKey::new(kind, blend_mode, no_textures);
+            let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
+            batch.push(base_instance.build(0, 0, 0));
+        }
+        PrimitiveKind::Image => {
+            let image_cpu = &ctx.prim_store.cpu_images[prim_metadata.cpu_prim_index.0];
 
-                batch.push(PrimitiveInstance::from(instance));
+            let (color_texture_id, uv_address) = resolve_image(
+                image_cpu.image_key,
+                image_cpu.image_rendering,
+                image_cpu.tile_offset,
+                ctx.resource_cache,
+                gpu_cache,
+                deferred_resolves,
+            );
+
+            if color_texture_id == SourceTexture::Invalid {
+                warn!("Warnings: skip a PrimitiveKind::Image at {:?}.\n", item_bounding_rect);
+                return;
             }
-            AlphaRenderItem::Composite(stacking_context_index, source_id, backdrop_id, mode, z) => {
-                let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
-                let key = BatchKey::new(
-                    BatchKind::Composite {
-                        task_id,
-                        source_id,
-                        backdrop_id,
-                    },
-                    BlendMode::PremultipliedAlpha,
-                    BatchTextures::no_texture(),
-                );
-                let batch = batch_list.get_suitable_batch(key, &stacking_context.screen_bounds);
-                let backdrop_task_address = render_tasks.get_task_address(backdrop_id);
-                let source_task_address = render_tasks.get_task_address(source_id);
+
+            let batch_kind = match color_texture_id {
+                SourceTexture::External(ext_image) => {
+                    match ext_image.image_type {
+                        ExternalImageType::Texture2DHandle => {
+                            TransformBatchKind::Image(ImageBufferKind::Texture2D)
+                        }
+                        ExternalImageType::Texture2DArrayHandle => {
+                            TransformBatchKind::Image(ImageBufferKind::Texture2DArray)
+                        }
+                        ExternalImageType::TextureRectHandle => {
+                            TransformBatchKind::Image(ImageBufferKind::TextureRect)
+                        }
+                        ExternalImageType::TextureExternalHandle => {
+                            TransformBatchKind::Image(ImageBufferKind::TextureExternal)
+                        }
+                        ExternalImageType::ExternalBuffer => {
+                            // The ExternalImageType::ExternalBuffer should be handled by resource_cache.
+                            // It should go through the non-external case.
+                            panic!(
+                                "Non-texture handle type should be handled in other way"
+                            );
+                        }
+                    }
+                }
+                _ => TransformBatchKind::Image(ImageBufferKind::Texture2DArray),
+            };
+
+            let textures = BatchTextures {
+                colors: [
+                    color_texture_id,
+                    SourceTexture::Invalid,
+                    SourceTexture::Invalid,
+                ],
+            };
+
+            let key = BatchKey::new(
+                BatchKind::Transformable(transform_kind, batch_kind),
+                blend_mode,
+                textures,
+            );
+            let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
+            batch.push(base_instance.build(uv_address.as_int(gpu_cache), 0, 0));
+        }
+        PrimitiveKind::TextRun => {
+            let text_cpu =
+                &ctx.prim_store.cpu_text_runs[prim_metadata.cpu_prim_index.0];
+
+            let font = text_cpu.get_font(ctx.device_pixel_ratio);
+
+            ctx.resource_cache.fetch_glyphs(
+                font,
+                &text_cpu.glyph_keys,
+                glyph_fetch_buffer,
+                gpu_cache,
+                |texture_id, glyph_format, glyphs| {
+                    debug_assert_ne!(texture_id, SourceTexture::Invalid);
 
-                let instance = CompositePrimitiveInstance::new(
-                    task_address,
-                    source_task_address,
-                    backdrop_task_address,
-                    mode as u32 as i32,
-                    0,
-                    z,
-                    0,
-                    0,
-                );
+                    let textures = BatchTextures {
+                        colors: [
+                            texture_id,
+                            SourceTexture::Invalid,
+                            SourceTexture::Invalid,
+                        ],
+                    };
+
+                    let kind = BatchKind::Transformable(
+                        transform_kind,
+                        TransformBatchKind::TextRun(glyph_format),
+                    );
+
+                    let key = BatchKey::new(kind, blend_mode, textures);
+                    let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
+
+                    for glyph in glyphs {
+                        batch.push(base_instance.build(
+                            glyph.index_in_text_run,
+                            glyph.uv_rect_address.as_int(),
+                            0,
+                        ));
+                    }
+                },
+            );
+        }
+        PrimitiveKind::Picture => {
+            let picture =
+                &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
+
+            match picture.render_task_id {
+                Some(cache_task_id) => {
+                    let cache_task_address = render_tasks.get_task_address(cache_task_id);
+                    let textures = BatchTextures::render_target_cache();
 
-                batch.push(PrimitiveInstance::from(instance));
-            }
-            AlphaRenderItem::Primitive(clip_id, scroll_id, prim_index, z) => {
-                let prim_metadata = ctx.prim_store.get_metadata(prim_index);
-                let scroll_node = &ctx.node_data[scroll_id.0 as usize];
-                // TODO(gw): Calculating this for every primitive is a bit
-                //           wasteful. We should probably cache this in
-                //           the scroll node...
-                let transform_kind = scroll_node.transform.transform_kind();
-                let item_bounding_rect = prim_metadata.screen_rect.as_ref().unwrap();
-                let prim_cache_address = gpu_cache.get_address(&prim_metadata.gpu_location);
-                let no_textures = BatchTextures::no_texture();
-                let clip_task_address = prim_metadata
-                    .clip_task_id
-                    .map_or(OPAQUE_TASK_ADDRESS, |id| render_tasks.get_task_address(id));
-                let base_instance = SimplePrimitiveInstance::new(
-                    prim_cache_address,
-                    task_address,
-                    clip_task_address,
-                    clip_id,
-                    scroll_id,
-                    z,
-                );
+                    match picture.kind {
+                        PictureKind::TextShadow { .. } => {
+                            let kind = BatchKind::Brush(
+                                BrushBatchKind::Image(picture.target_kind()),
+                            );
+                            let key = BatchKey::new(kind, blend_mode, textures);
+                            let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
+
+                            let instance = BrushInstance {
+                                picture_address: task_address,
+                                prim_address: prim_cache_address,
+                                clip_id,
+                                scroll_id,
+                                clip_task_address,
+                                z,
+                                flags: 0,
+                                user_data0: cache_task_address.0 as i32,
+                                user_data1: BrushImageKind::Simple as i32,
+                            };
+                            batch.push(PrimitiveInstance::from(instance));
+                        }
+                        PictureKind::BoxShadow { radii_kind, .. } => {
+                            let kind = BatchKind::Brush(
+                                BrushBatchKind::Image(picture.target_kind()),
+                            );
+                            let key = BatchKey::new(kind, blend_mode, textures);
+                            let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
+
+                            let image_kind = match radii_kind {
+                                BorderRadiusKind::Uniform => {
+                                    BrushImageKind::Mirror
+                                }
+                                BorderRadiusKind::NonUniform => {
+                                    BrushImageKind::NinePatch
+                                }
+                            };
 
-                let blend_mode = ctx.prim_store.get_blend_mode(prim_metadata, transform_kind);
+                            let instance = BrushInstance {
+                                picture_address: task_address,
+                                prim_address: prim_cache_address,
+                                clip_id,
+                                scroll_id,
+                                clip_task_address,
+                                z,
+                                flags: 0,
+                                user_data0: cache_task_address.0 as i32,
+                                user_data1: image_kind as i32,
+                            };
+                            batch.push(PrimitiveInstance::from(instance));
+                        }
+                        PictureKind::Image {
+                            composite_mode,
+                            readback_render_task_id,
+                            is_in_3d_context,
+                            reference_frame_id,
+                            real_local_rect,
+                            ..
+                        } => {
+                            // If this picture is participating in a 3D rendering context,
+                            // then don't add it to any batches here. Instead, create a polygon
+                            // for it and add it to the current plane splitter.
+                            if is_in_3d_context {
+                                // Push into parent plane splitter.
+
+                                let real_xf = &ctx.clip_scroll_tree.nodes[&reference_frame_id].world_content_transform;
+
+                                let polygon = make_polygon(
+                                    real_local_rect,
+                                    &real_xf,
+                                    prim_index.0,
+                                );
+
+                                splitter.add(polygon);
+
+                                return;
+                            }
 
-                match prim_metadata.prim_kind {
-                    PrimitiveKind::Brush => {
-                        panic!("BUG: brush type not expected in an alpha task (yet)");
-                    }
-                    PrimitiveKind::Border => {
-                        let border_cpu =
-                            &ctx.prim_store.cpu_borders[prim_metadata.cpu_prim_index.0];
-                        // TODO(gw): Select correct blend mode for edges and corners!!
-                        let corner_kind = BatchKind::Transformable(
-                            transform_kind,
-                            TransformBatchKind::BorderCorner,
-                        );
-                        let corner_key = BatchKey::new(corner_kind, blend_mode, no_textures);
-                        let edge_kind = BatchKind::Transformable(
-                            transform_kind,
-                            TransformBatchKind::BorderEdge,
-                        );
-                        let edge_key = BatchKey::new(edge_kind, blend_mode, no_textures);
+                            // Depending on the composite mode of the picture, we generate the
+                            // old style Composite primitive instances. In the future, we'll
+                            // remove these and pass them through the brush batching pipeline.
+                            // This will allow us to unify some of the shaders, apply clip masks
+                            // when compositing pictures, and also correctly apply pixel snapping
+                            // to picture compositing operations.
+                            let source_id = picture.render_task_id.expect("no source!?");
+
+                            match composite_mode.expect("bug: only composites here") {
+                                PictureCompositeMode::Filter(filter) => {
+                                    match filter {
+                                        FilterOp::Blur(..) => {
+                                            let src_task_address = render_tasks.get_task_address(source_id);
+                                            let key = BatchKey::new(
+                                                BatchKind::HardwareComposite,
+                                                BlendMode::PremultipliedAlpha,
+                                                BatchTextures::no_texture(),
+                                            );
+                                            let batch = batch_list.get_suitable_batch(key, &item_bounding_rect);
+                                            let instance = CompositePrimitiveInstance::new(
+                                                task_address,
+                                                src_task_address,
+                                                RenderTaskAddress(0),
+                                                item_bounding_rect.origin.x,
+                                                item_bounding_rect.origin.y,
+                                                z,
+                                                item_bounding_rect.size.width,
+                                                item_bounding_rect.size.height,
+                                            );
+
+                                            batch.push(PrimitiveInstance::from(instance));
+                                        }
+                                        _ => {
+                                            let key = BatchKey::new(
+                                                BatchKind::Blend,
+                                                BlendMode::PremultipliedAlpha,
+                                                BatchTextures::no_texture(),
+                                            );
+                                            let src_task_address = render_tasks.get_task_address(source_id);
 
-                        // Work around borrow ck on borrowing batch_list twice.
-                        {
-                            let batch =
-                                batch_list.get_suitable_batch(corner_key, item_bounding_rect);
-                            for (i, instance_kind) in border_cpu.corner_instances.iter().enumerate()
-                            {
-                                let sub_index = i as i32;
-                                match *instance_kind {
-                                    BorderCornerInstance::None => {}
-                                    BorderCornerInstance::Single => {
-                                        batch.push(base_instance.build(
-                                            sub_index,
-                                            BorderCornerSide::Both as i32,
-                                            0,
-                                        ));
+                                            let (filter_mode, amount) = match filter {
+                                                FilterOp::Blur(..) => (0, 0.0),
+                                                FilterOp::Contrast(amount) => (1, amount),
+                                                FilterOp::Grayscale(amount) => (2, amount),
+                                                FilterOp::HueRotate(angle) => (3, angle),
+                                                FilterOp::Invert(amount) => (4, amount),
+                                                FilterOp::Saturate(amount) => (5, amount),
+                                                FilterOp::Sepia(amount) => (6, amount),
+                                                FilterOp::Brightness(amount) => (7, amount),
+                                                FilterOp::Opacity(_, amount) => (8, amount),
+                                            };
+
+                                            let amount = (amount * 65535.0).round() as i32;
+                                            let batch = batch_list.get_suitable_batch(key, &item_bounding_rect);
+
+                                            let instance = CompositePrimitiveInstance::new(
+                                                task_address,
+                                                src_task_address,
+                                                RenderTaskAddress(0),
+                                                filter_mode,
+                                                amount,
+                                                z,
+                                                0,
+                                                0,
+                                            );
+
+                                            batch.push(PrimitiveInstance::from(instance));
+                                        }
                                     }
-                                    BorderCornerInstance::Double => {
-                                        batch.push(base_instance.build(
-                                            sub_index,
-                                            BorderCornerSide::First as i32,
-                                            0,
-                                        ));
-                                        batch.push(base_instance.build(
-                                            sub_index,
-                                            BorderCornerSide::Second as i32,
-                                            0,
-                                        ));
-                                    }
+                                }
+                                PictureCompositeMode::MixBlend(mode) => {
+                                    let backdrop_id = readback_render_task_id.expect("no backdrop!?");
+
+                                    let key = BatchKey::new(
+                                        BatchKind::Composite {
+                                            task_id,
+                                            source_id,
+                                            backdrop_id,
+                                        },
+                                        BlendMode::PremultipliedAlpha,
+                                        BatchTextures::no_texture(),
+                                    );
+                                    let batch = batch_list.get_suitable_batch(key, &item_bounding_rect);
+                                    let backdrop_task_address = render_tasks.get_task_address(backdrop_id);
+                                    let source_task_address = render_tasks.get_task_address(source_id);
+
+                                    let instance = CompositePrimitiveInstance::new(
+                                        task_address,
+                                        source_task_address,
+                                        backdrop_task_address,
+                                        mode as u32 as i32,
+                                        0,
+                                        z,
+                                        0,
+                                        0,
+                                    );
+
+                                    batch.push(PrimitiveInstance::from(instance));
+                                }
+                                PictureCompositeMode::Blit => {
+                                    let src_task_address = render_tasks.get_task_address(source_id);
+                                    let key = BatchKey::new(
+                                        BatchKind::HardwareComposite,
+                                        BlendMode::PremultipliedAlpha,
+                                        BatchTextures::no_texture(),
+                                    );
+                                    let batch = batch_list.get_suitable_batch(key, &item_bounding_rect);
+                                    let instance = CompositePrimitiveInstance::new(
+                                        task_address,
+                                        src_task_address,
+                                        RenderTaskAddress(0),
+                                        item_bounding_rect.origin.x,
+                                        item_bounding_rect.origin.y,
+                                        z,
+                                        item_bounding_rect.size.width,
+                                        item_bounding_rect.size.height,
+                                    );
+
+                                    batch.push(PrimitiveInstance::from(instance));
                                 }
                             }
                         }
+                    }
+                }
+                None => {
+                    // If this picture is being drawn into an existing target (i.e. with
+                    // no composition operation), recurse and add to the current batch list.
+                    picture.add_to_batch(
+                        task_id,
+                        ctx,
+                        gpu_cache,
+                        render_tasks,
+                        deferred_resolves,
+                        batch_list,
+                        glyph_fetch_buffer,
+                    );
+                }
+            }
+        }
+        PrimitiveKind::AlignedGradient => {
+            let gradient_cpu =
+                &ctx.prim_store.cpu_gradients[prim_metadata.cpu_prim_index.0];
+            let kind = BatchKind::Transformable(
+                transform_kind,
+                TransformBatchKind::AlignedGradient,
+            );
+            let key = BatchKey::new(kind, blend_mode, no_textures);
+            let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
+            for part_index in 0 .. (gradient_cpu.stops_count - 1) {
+                batch.push(base_instance.build(part_index as i32, 0, 0));
+            }
+        }
+        PrimitiveKind::AngleGradient => {
+            let kind = BatchKind::Transformable(
+                transform_kind,
+                TransformBatchKind::AngleGradient,
+            );
+            let key = BatchKey::new(kind, blend_mode, no_textures);
+            let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
+            batch.push(base_instance.build(0, 0, 0));
+        }
+        PrimitiveKind::RadialGradient => {
+            let kind = BatchKind::Transformable(
+                transform_kind,
+                TransformBatchKind::RadialGradient,
+            );
+            let key = BatchKey::new(kind, blend_mode, no_textures);
+            let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
+            batch.push(base_instance.build(0, 0, 0));
+        }
+        PrimitiveKind::YuvImage => {
+            let mut textures = BatchTextures::no_texture();
+            let mut uv_rect_addresses = [0; 3];
+            let image_yuv_cpu =
+                &ctx.prim_store.cpu_yuv_images[prim_metadata.cpu_prim_index.0];
 
-                        let batch = batch_list.get_suitable_batch(edge_key, item_bounding_rect);
-                        for border_segment in 0 .. 4 {
-                            batch.push(base_instance.build(border_segment, 0, 0));
+            //yuv channel
+            let channel_count = image_yuv_cpu.format.get_plane_num();
+            debug_assert!(channel_count <= 3);
+            for channel in 0 .. channel_count {
+                let image_key = image_yuv_cpu.yuv_key[channel];
+
+                let (texture, address) = resolve_image(
+                    image_key,
+                    image_yuv_cpu.image_rendering,
+                    None,
+                    ctx.resource_cache,
+                    gpu_cache,
+                    deferred_resolves,
+                );
+
+                if texture == SourceTexture::Invalid {
+                    warn!("Warnings: skip a PrimitiveKind::YuvImage at {:?}.\n", item_bounding_rect);
+                    return;
+                }
+
+                textures.colors[channel] = texture;
+                uv_rect_addresses[channel] = address.as_int(gpu_cache);
+            }
+
+            let get_buffer_kind = |texture: SourceTexture| {
+                match texture {
+                    SourceTexture::External(ext_image) => {
+                        match ext_image.image_type {
+                            ExternalImageType::Texture2DHandle => {
+                                ImageBufferKind::Texture2D
+                            }
+                            ExternalImageType::Texture2DArrayHandle => {
+                                ImageBufferKind::Texture2DArray
+                            }
+                            ExternalImageType::TextureRectHandle => {
+                                ImageBufferKind::TextureRect
+                            }
+                            ExternalImageType::TextureExternalHandle => {
+                                ImageBufferKind::TextureExternal
+                            }
+                            ExternalImageType::ExternalBuffer => {
+                                // The ExternalImageType::ExternalBuffer should be handled by resource_cache.
+                                // It should go through the non-external case.
+                                panic!("Unexpected non-texture handle type");
+                            }
                         }
                     }
-                    PrimitiveKind::Rectangle => {
-                        let needs_clipping = prim_metadata.clip_task_id.is_some();
-                        let kind = BatchKind::Transformable(
-                            transform_kind,
-                            TransformBatchKind::Rectangle(needs_clipping),
-                        );
-                        let key = BatchKey::new(kind, blend_mode, no_textures);
-                        let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
-                        batch.push(base_instance.build(0, 0, 0));
-                    }
-                    PrimitiveKind::Line => {
-                        let kind =
-                            BatchKind::Transformable(transform_kind, TransformBatchKind::Line);
-                        let key = BatchKey::new(kind, blend_mode, no_textures);
-                        let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
-                        batch.push(base_instance.build(0, 0, 0));
-                    }
-                    PrimitiveKind::Image => {
-                        let image_cpu = &ctx.prim_store.cpu_images[prim_metadata.cpu_prim_index.0];
+                    _ => ImageBufferKind::Texture2DArray,
+                }
+            };
 
-                        let (color_texture_id, uv_address) = resolve_image(
-                            image_cpu.image_key,
-                            image_cpu.image_rendering,
-                            image_cpu.tile_offset,
-                            ctx.resource_cache,
-                            gpu_cache,
-                            deferred_resolves,
-                        );
+            // All yuv textures should be the same type.
+            let buffer_kind = get_buffer_kind(textures.colors[0]);
+            assert!(
+                textures.colors[1 .. image_yuv_cpu.format.get_plane_num()]
+                    .iter()
+                    .all(|&tid| buffer_kind == get_buffer_kind(tid))
+            );
 
-                        if color_texture_id == SourceTexture::Invalid {
-                            warn!("Warnings: skip a PrimitiveKind::Image at {:?}.\n", item_bounding_rect);
-                            return;
-                        }
+            let kind = BatchKind::Transformable(
+                transform_kind,
+                TransformBatchKind::YuvImage(
+                    buffer_kind,
+                    image_yuv_cpu.format,
+                    image_yuv_cpu.color_space,
+                ),
+            );
+            let key = BatchKey::new(kind, blend_mode, textures);
+            let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
 
-                        let batch_kind = match color_texture_id {
-                            SourceTexture::External(ext_image) => {
-                                match ext_image.image_type {
-                                    ExternalImageType::Texture2DHandle => {
-                                        TransformBatchKind::Image(ImageBufferKind::Texture2D)
-                                    }
-                                    ExternalImageType::Texture2DArrayHandle => {
-                                        TransformBatchKind::Image(ImageBufferKind::Texture2DArray)
-                                    }
-                                    ExternalImageType::TextureRectHandle => {
-                                        TransformBatchKind::Image(ImageBufferKind::TextureRect)
-                                    }
-                                    ExternalImageType::TextureExternalHandle => {
-                                        TransformBatchKind::Image(ImageBufferKind::TextureExternal)
-                                    }
-                                    ExternalImageType::ExternalBuffer => {
-                                        // The ExternalImageType::ExternalBuffer should be handled by resource_cache.
-                                        // It should go through the non-external case.
-                                        panic!(
-                                            "Non-texture handle type should be handled in other way"
-                                        );
-                                    }
-                                }
-                            }
-                            _ => TransformBatchKind::Image(ImageBufferKind::Texture2DArray),
-                        };
-
-                        let textures = BatchTextures {
-                            colors: [
-                                color_texture_id,
-                                SourceTexture::Invalid,
-                                SourceTexture::Invalid,
-                            ],
-                        };
+            batch.push(base_instance.build(
+                uv_rect_addresses[0],
+                uv_rect_addresses[1],
+                uv_rect_addresses[2],
+            ));
+        }
+    }
+}
 
-                        let key = BatchKey::new(
-                            BatchKind::Transformable(transform_kind, batch_kind),
-                            blend_mode,
-                            textures,
-                        );
-                        let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
-                        batch.push(base_instance.build(uv_address.as_int(gpu_cache), 0, 0));
-                    }
-                    PrimitiveKind::TextRun => {
-                        let text_cpu =
-                            &ctx.prim_store.cpu_text_runs[prim_metadata.cpu_prim_index.0];
-
-                        let font = text_cpu.get_font(ctx.device_pixel_ratio);
+impl PicturePrimitive {
+    fn add_to_batch(
+        &self,
+        task_id: RenderTaskId,
+        ctx: &RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        render_tasks: &RenderTaskTree,
+        deferred_resolves: &mut Vec<DeferredResolve>,
+        batch_list: &mut BatchList,
+        glyph_fetch_buffer: &mut Vec<GlyphFetchResult>,
+    ) {
+        let task_address = render_tasks.get_task_address(task_id);
 
-                        ctx.resource_cache.fetch_glyphs(
-                            font,
-                            &text_cpu.glyph_keys,
-                            glyph_fetch_buffer,
-                            gpu_cache,
-                            |texture_id, glyph_format, glyphs| {
-                                debug_assert_ne!(texture_id, SourceTexture::Invalid);
+        // Even though most of the time a splitter isn't used or needed,
+        // they are cheap to construct so we will always pass one down.
+        let mut splitter = BspSplitter::new();
 
-                                let textures = BatchTextures {
-                                    colors: [
-                                        texture_id,
-                                        SourceTexture::Invalid,
-                                        SourceTexture::Invalid,
-                                    ],
-                                };
-
-                                let kind = BatchKind::Transformable(
-                                    transform_kind,
-                                    TransformBatchKind::TextRun(glyph_format),
-                                );
-
-                                let key = BatchKey::new(kind, blend_mode, textures);
-                                let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
+        // Add each run in this picture to the batch.
+        for run in &self.runs {
+            let clip_node = &ctx.clip_scroll_tree.nodes[&run.clip_and_scroll.clip_node_id()];
+            let clip_id = clip_node.node_data_index;
 
-                                for glyph in glyphs {
-                                    batch.push(base_instance.build(
-                                        glyph.index_in_text_run,
-                                        glyph.uv_rect_address.as_int(),
-                                        0,
-                                    ));
-                                }
-                            },
-                        );
-                    }
-                    PrimitiveKind::Picture => {
-                        let picture =
-                            &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
-                        let cache_task_id = picture.render_task_id.expect("no render task!");
-                        let cache_task_address = render_tasks.get_task_address(cache_task_id);
-                        let textures = BatchTextures::render_target_cache();
-                        let kind = BatchKind::Brush(
-                            BrushBatchKind::Image(picture.target_kind()),
-                        );
-                        let key = BatchKey::new(kind, blend_mode, textures);
-                        let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
-                        let image_kind = match picture.kind {
-                            PictureKind::TextShadow { .. } => {
-                                BrushImageKind::Simple
-                            }
-                            PictureKind::BoxShadow { radii_kind, .. } => {
-                                match radii_kind {
-                                    BorderRadiusKind::Uniform => {
-                                        BrushImageKind::Mirror
-                                    }
-                                    BorderRadiusKind::NonUniform => {
-                                        BrushImageKind::NinePatch
-                                    }
-                                }
-                            }
-                        };
-                        let instance = BrushInstance {
-                            picture_address: task_address,
-                            prim_address: prim_cache_address,
-                            clip_id,
-                            scroll_id,
-                            clip_task_address,
-                            z,
-                            flags: 0,
-                            user_data0: cache_task_address.0 as i32,
-                            user_data1: image_kind as i32,
-                        };
-                        batch.push(PrimitiveInstance::from(instance));
-                    }
-                    PrimitiveKind::AlignedGradient => {
-                        let gradient_cpu =
-                            &ctx.prim_store.cpu_gradients[prim_metadata.cpu_prim_index.0];
-                        let kind = BatchKind::Transformable(
-                            transform_kind,
-                            TransformBatchKind::AlignedGradient,
-                        );
-                        let key = BatchKey::new(kind, blend_mode, no_textures);
-                        let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
-                        for part_index in 0 .. (gradient_cpu.stops_count - 1) {
-                            batch.push(base_instance.build(part_index as i32, 0, 0));
-                        }
-                    }
-                    PrimitiveKind::AngleGradient => {
-                        let kind = BatchKind::Transformable(
-                            transform_kind,
-                            TransformBatchKind::AngleGradient,
-                        );
-                        let key = BatchKey::new(kind, blend_mode, no_textures);
-                        let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
-                        batch.push(base_instance.build(0, 0, 0));
-                    }
-                    PrimitiveKind::RadialGradient => {
-                        let kind = BatchKind::Transformable(
-                            transform_kind,
-                            TransformBatchKind::RadialGradient,
-                        );
-                        let key = BatchKey::new(kind, blend_mode, no_textures);
-                        let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
-                        batch.push(base_instance.build(0, 0, 0));
-                    }
-                    PrimitiveKind::YuvImage => {
-                        let mut textures = BatchTextures::no_texture();
-                        let mut uv_rect_addresses = [0; 3];
-                        let image_yuv_cpu =
-                            &ctx.prim_store.cpu_yuv_images[prim_metadata.cpu_prim_index.0];
+            let scroll_node = &ctx.clip_scroll_tree.nodes[&run.clip_and_scroll.scroll_node_id];
+            let scroll_id = scroll_node.node_data_index;
 
-                        //yuv channel
-                        let channel_count = image_yuv_cpu.format.get_plane_num();
-                        debug_assert!(channel_count <= 3);
-                        for channel in 0 .. channel_count {
-                            let image_key = image_yuv_cpu.yuv_key[channel];
+            run.add_to_batch(
+                clip_id,
+                scroll_id,
+                batch_list,
+                ctx,
+                gpu_cache,
+                render_tasks,
+                task_id,
+                task_address,
+                deferred_resolves,
+                glyph_fetch_buffer,
+                &mut splitter,
+            );
+        }
 
-                            let (texture, address) = resolve_image(
-                                image_key,
-                                image_yuv_cpu.image_rendering,
-                                None,
-                                ctx.resource_cache,
-                                gpu_cache,
-                                deferred_resolves,
-                            );
-
-                            if texture == SourceTexture::Invalid {
-                                warn!("Warnings: skip a PrimitiveKind::YuvImage at {:?}.\n", item_bounding_rect);
-                                return;
-                            }
-
-                            textures.colors[channel] = texture;
-                            uv_rect_addresses[channel] = address.as_int(gpu_cache);
-                        }
-
-                        let get_buffer_kind = |texture: SourceTexture| {
-                            match texture {
-                                SourceTexture::External(ext_image) => {
-                                    match ext_image.image_type {
-                                        ExternalImageType::Texture2DHandle => {
-                                            ImageBufferKind::Texture2D
-                                        }
-                                        ExternalImageType::Texture2DArrayHandle => {
-                                            ImageBufferKind::Texture2DArray
-                                        }
-                                        ExternalImageType::TextureRectHandle => {
-                                            ImageBufferKind::TextureRect
-                                        }
-                                        ExternalImageType::TextureExternalHandle => {
-                                            ImageBufferKind::TextureExternal
-                                        }
-                                        ExternalImageType::ExternalBuffer => {
-                                            // The ExternalImageType::ExternalBuffer should be handled by resource_cache.
-                                            // It should go through the non-external case.
-                                            panic!("Unexpected non-texture handle type");
-                                        }
-                                    }
-                                }
-                                _ => ImageBufferKind::Texture2DArray,
-                            }
-                        };
+        // Flush the accumulated plane splits onto the task tree.
+        // Z axis is directed at the screen, `sort` is ascending, and we need back-to-front order.
+        for poly in splitter.sort(vec3(0.0, 0.0, 1.0)) {
+            let prim_index = PrimitiveIndex(poly.anchor);
+            debug!("process sorted poly {:?} {:?}", prim_index, poly.points);
+            let pp = &poly.points;
+            let gpu_blocks = [
+                [pp[0].x as f32, pp[0].y as f32, pp[0].z as f32, pp[1].x as f32].into(),
+                [pp[1].y as f32, pp[1].z as f32, pp[2].x as f32, pp[2].y as f32].into(),
+                [pp[2].z as f32, pp[3].x as f32, pp[3].y as f32, pp[3].z as f32].into(),
+            ];
+            let gpu_handle = gpu_cache.push_per_frame_blocks(&gpu_blocks);
+            let key = BatchKey::new(
+                BatchKind::SplitComposite,
+                BlendMode::PremultipliedAlpha,
+                BatchTextures::no_texture(),
+            );
+            let pic_metadata = &ctx.prim_store.cpu_metadata[prim_index.0];
+            let pic = &ctx.prim_store.cpu_pictures[pic_metadata.cpu_prim_index.0];
+            let batch = batch_list.get_suitable_batch(key, pic_metadata.screen_rect.as_ref().expect("bug"));
+            let source_task_address = render_tasks.get_task_address(pic.render_task_id.expect("bug"));
+            let gpu_address = gpu_handle.as_int(gpu_cache);
 
-                        // All yuv textures should be the same type.
-                        let buffer_kind = get_buffer_kind(textures.colors[0]);
-                        assert!(
-                            textures.colors[1 .. image_yuv_cpu.format.get_plane_num()]
-                                .iter()
-                                .all(|&tid| buffer_kind == get_buffer_kind(tid))
-                        );
-
-                        let kind = BatchKind::Transformable(
-                            transform_kind,
-                            TransformBatchKind::YuvImage(
-                                buffer_kind,
-                                image_yuv_cpu.format,
-                                image_yuv_cpu.color_space,
-                            ),
-                        );
-                        let key = BatchKey::new(kind, blend_mode, textures);
-                        let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
+            let instance = CompositePrimitiveInstance::new(
+                task_address,
+                source_task_address,
+                RenderTaskAddress(0),
+                gpu_address,
+                0,
+                prim_index.0 as i32,
+                0,
+                0,
+            );
 
-                        batch.push(base_instance.build(
-                            uv_rect_addresses[0],
-                            uv_rect_addresses[1],
-                            uv_rect_addresses[2],
-                        ));
-                    }
-                }
-            }
-            AlphaRenderItem::SplitComposite(sc_index, task_id, gpu_handle, z) => {
-                let key = BatchKey::new(
-                    BatchKind::SplitComposite,
-                    BlendMode::PremultipliedAlpha,
-                    BatchTextures::no_texture(),
-                );
-                let stacking_context = &ctx.stacking_context_store[sc_index.0];
-                let batch = batch_list.get_suitable_batch(key, &stacking_context.screen_bounds);
-                let source_task_address = render_tasks.get_task_address(task_id);
-                let gpu_address = gpu_handle.as_int(gpu_cache);
-
-                let instance = CompositePrimitiveInstance::new(
-                    task_address,
-                    source_task_address,
-                    RenderTaskAddress(0),
-                    gpu_address,
-                    0,
-                    z,
-                    0,
-                    0,
-                );
-
-                batch.push(PrimitiveInstance::from(instance));
-            }
+            batch.push(PrimitiveInstance::from(instance));
         }
     }
 }
 
 impl AlphaBatcher {
-    fn new() -> AlphaBatcher {
+    fn new(screen_size: DeviceIntSize) -> AlphaBatcher {
         AlphaBatcher {
             tasks: Vec::new(),
-            batch_list: BatchList::new(),
+            batch_list: BatchList::new(screen_size),
             glyph_fetch_buffer: Vec::new(),
         }
     }
 
     fn add_task(&mut self, task_id: RenderTaskId) {
         self.tasks.push(task_id);
     }
 
@@ -806,30 +1017,26 @@ impl AlphaBatcher {
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
     ) {
         for task_id in &self.tasks {
             let task_id = *task_id;
             let task = render_tasks.get(task_id).as_alpha_batch();
-            let task_address = render_tasks.get_task_address(task_id);
-
-            for item in &task.items {
-                item.add_to_batch(
-                    &mut self.batch_list,
-                    ctx,
-                    gpu_cache,
-                    render_tasks,
-                    task_id,
-                    task_address,
-                    deferred_resolves,
-                    &mut self.glyph_fetch_buffer,
-                );
-            }
+            let pic = &ctx.prim_store.cpu_pictures[ctx.prim_store.cpu_metadata[task.prim_index.0].cpu_prim_index.0];
+            pic.add_to_batch(
+                task_id,
+                ctx,
+                gpu_cache,
+                render_tasks,
+                deferred_resolves,
+                &mut self.batch_list,
+                &mut self.glyph_fetch_buffer
+            );
         }
 
         self.batch_list.finalize();
     }
 
     pub fn is_empty(&self) -> bool {
         self.batch_list.opaque_batch_list.batches.is_empty() &&
             self.batch_list.alpha_batch_list.batches.is_empty()
@@ -957,20 +1164,20 @@ impl ClipBatcher {
                 }
             }
         }
     }
 }
 
 pub struct RenderTargetContext<'a> {
     pub device_pixel_ratio: f32,
-    pub stacking_context_store: &'a [StackingContext],
     pub prim_store: &'a PrimitiveStore,
     pub resource_cache: &'a ResourceCache,
     pub node_data: &'a [ClipScrollNodeData],
+    pub clip_scroll_tree: &'a ClipScrollTree,
 }
 
 struct TextureAllocator {
     // TODO(gw): Replace this with a simpler allocator for
     // render target allocation - this use case doesn't need
     // to deal with coalescing etc that the general texture
     // cache allocator requires.
     allocator: GuillotineAllocator,
@@ -1002,17 +1209,20 @@ impl TextureAllocator {
             self.used_rect = rect.union(&self.used_rect);
         }
 
         origin
     }
 }
 
 pub trait RenderTarget {
-    fn new(size: Option<DeviceUintSize>) -> Self;
+    fn new(
+        size: Option<DeviceUintSize>,
+        screen_size: DeviceIntSize,
+    ) -> Self;
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint>;
     fn build(
         &mut self,
         _ctx: &RenderTargetContext,
         _gpu_cache: &mut GpuCache,
         _render_tasks: &mut RenderTaskTree,
         _deferred_resolves: &mut Vec<DeferredResolve>,
     ) {
@@ -1030,27 +1240,34 @@ pub trait RenderTarget {
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub enum RenderTargetKind {
     Color, // RGBA32
     Alpha, // R8
 }
 
 pub struct RenderTargetList<T> {
+    screen_size: DeviceIntSize,
     pub targets: Vec<T>,
 }
 
 impl<T: RenderTarget> RenderTargetList<T> {
-    fn new(create_initial_target: bool) -> RenderTargetList<T> {
+    fn new(
+        create_initial_target: bool,
+        screen_size: DeviceIntSize
+    ) -> RenderTargetList<T> {
         let mut targets = Vec::new();
         if create_initial_target {
-            targets.push(T::new(None));
+            targets.push(T::new(None, screen_size));
         }
 
-        RenderTargetList { targets }
+        RenderTargetList {
+            targets,
+            screen_size,
+        }
     }
 
     pub fn target_count(&self) -> usize {
         self.targets.len()
     }
 
     fn build(
         &mut self,
@@ -1088,17 +1305,17 @@ impl<T: RenderTarget> RenderTargetList<T
     ) -> (DeviceUintPoint, RenderTargetIndex) {
         let existing_origin = self.targets
             .last_mut()
             .and_then(|target| target.allocate(alloc_size));
 
         let origin = match existing_origin {
             Some(origin) => origin,
             None => {
-                let mut new_target = T::new(Some(target_size));
+                let mut new_target = T::new(Some(target_size), self.screen_size);
                 let origin = new_target.allocate(alloc_size).expect(&format!(
                     "Each render task must allocate <= size of one target! ({:?})",
                     alloc_size
                 ));
                 self.targets.push(new_target);
                 origin
             }
         };
@@ -1141,19 +1358,22 @@ pub struct ColorRenderTarget {
 impl RenderTarget for ColorRenderTarget {
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint> {
         self.allocator
             .as_mut()
             .expect("bug: calling allocate on framebuffer")
             .allocate(&size)
     }
 
-    fn new(size: Option<DeviceUintSize>) -> ColorRenderTarget {
+    fn new(
+        size: Option<DeviceUintSize>,
+        screen_size: DeviceIntSize,
+    ) -> Self {
         ColorRenderTarget {
-            alpha_batcher: AlphaBatcher::new(),
+            alpha_batcher: AlphaBatcher::new(screen_size),
             text_run_cache_prims: FastHashMap::default(),
             line_cache_prims: Vec::new(),
             vertical_blurs: Vec::new(),
             horizontal_blurs: Vec::new(),
             readbacks: Vec::new(),
             scalings: Vec::new(),
             allocator: size.map(|size| TextureAllocator::new(size)),
             glyph_fetch_buffer: Vec::new(),
@@ -1226,17 +1446,17 @@ impl RenderTarget for ColorRenderTarget 
             RenderTaskKind::Picture(ref task_info) => {
                 let prim_metadata = ctx.prim_store.get_metadata(task_info.prim_index);
                 match prim_metadata.prim_kind {
                     PrimitiveKind::Picture => {
                         let prim = &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
 
                         let task_index = render_tasks.get_task_address(task_id);
 
-                        for run in &prim.prim_runs {
+                        for run in &prim.runs {
                             for i in 0 .. run.count {
                                 let sub_prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
 
                                 let sub_metadata = ctx.prim_store.get_metadata(sub_prim_index);
                                 let sub_prim_address =
                                     gpu_cache.get_address(&sub_metadata.gpu_location);
                                 let instance = SimplePrimitiveInstance::new(
                                     sub_prim_address,
@@ -1323,17 +1543,20 @@ pub struct AlphaRenderTarget {
     allocator: TextureAllocator,
 }
 
 impl RenderTarget for AlphaRenderTarget {
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint> {
         self.allocator.allocate(&size)
     }
 
-    fn new(size: Option<DeviceUintSize>) -> AlphaRenderTarget {
+    fn new(
+        size: Option<DeviceUintSize>,
+        _: DeviceIntSize,
+    ) -> Self {
         AlphaRenderTarget {
             clip_batcher: ClipBatcher::new(),
             brush_mask_corners: Vec::new(),
             brush_mask_rounded_rects: Vec::new(),
             vertical_blurs: Vec::new(),
             horizontal_blurs: Vec::new(),
             scalings: Vec::new(),
             zero_clears: Vec::new(),
@@ -1395,17 +1618,17 @@ impl RenderTarget for AlphaRenderTarget 
                 let prim_metadata = ctx.prim_store.get_metadata(task_info.prim_index);
 
                 match prim_metadata.prim_kind {
                     PrimitiveKind::Picture => {
                         let prim = &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
 
                         let task_index = render_tasks.get_task_address(task_id);
 
-                        for run in &prim.prim_runs {
+                        for run in &prim.runs {
                             for i in 0 .. run.count {
                                 let sub_prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
 
                                 let sub_metadata = ctx.prim_store.get_metadata(sub_prim_index);
                                 let sub_prim_address =
                                     gpu_cache.get_address(&sub_metadata.gpu_location);
 
                                 match sub_metadata.prim_kind {
@@ -1485,21 +1708,24 @@ pub struct RenderPass {
     pub color_texture: Option<Texture>,
     pub alpha_texture: Option<Texture>,
     dynamic_tasks: FastHashMap<RenderTaskKey, DynamicTaskInfo>,
     pub max_color_target_size: DeviceUintSize,
     pub max_alpha_target_size: DeviceUintSize,
 }
 
 impl RenderPass {
-    pub fn new(is_framebuffer: bool) -> RenderPass {
+    pub fn new(
+        is_framebuffer: bool,
+        screen_size: DeviceIntSize
+    ) -> RenderPass {
         RenderPass {
             is_framebuffer,
-            color_targets: RenderTargetList::new(is_framebuffer),
-            alpha_targets: RenderTargetList::new(false),
+            color_targets: RenderTargetList::new(is_framebuffer, screen_size),
+            alpha_targets: RenderTargetList::new(false, screen_size),
             tasks: vec![],
             color_texture: None,
             alpha_texture: None,
             dynamic_tasks: FastHashMap::default(),
             max_color_target_size: DeviceUintSize::new(MIN_TARGET_SIZE, MIN_TARGET_SIZE),
             max_alpha_target_size: DeviceUintSize::new(MIN_TARGET_SIZE, MIN_TARGET_SIZE),
         }
     }
@@ -1725,121 +1951,16 @@ impl OpaquePrimitiveBatch {
     fn new(key: BatchKey) -> OpaquePrimitiveBatch {
         OpaquePrimitiveBatch {
             key,
             instances: Vec::new(),
         }
     }
 }
 
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
-pub struct StackingContextIndex(pub usize);
-
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
-pub enum ContextIsolation {
-    /// No isolation - the content is mixed up with everything else.
-    None,
-    /// Items are isolated and drawn into a separate render target.
-    /// Child contexts are exposed.
-    Items,
-    /// All the content inside is isolated and drawn into a separate target.
-    Full,
-}
-
-#[derive(Debug)]
-pub struct StackingContext {
-    pub pipeline_id: PipelineId,
-
-    /// Offset in the parent reference frame to the origin of this stacking
-    /// context's coordinate system.
-    pub reference_frame_offset: LayerVector2D,
-
-    /// The `ClipId` of the owning reference frame.
-    pub reference_frame_id: ClipId,
-
-    /// Screen space bounding rectangle for this stacking context,
-    /// calculated based on the size and position of all its children.
-    pub screen_bounds: DeviceIntRect,
-
-    /// Local bounding rectangle of this stacking context,
-    /// computed as the union of all contained items that are not
-    /// `ContextIsolation::Items` on their own
-    pub isolated_items_bounds: LayerRect,
-
-    pub composite_ops: CompositeOps,
-
-    /// Type of the isolation of the content.
-    pub isolation: ContextIsolation,
-
-    /// Set for the root stacking context of a display list or an iframe. Used for determining
-    /// when to isolate a mix-blend-mode composite.
-    pub is_page_root: bool,
-
-    /// Set to true if this is the root stacking context for a pipeline.
-    pub is_pipeline_root: bool,
-
-    /// Whether or not this stacking context has any visible components, calculated
-    /// based on the size and position of all children and how they are clipped.
-    pub is_visible: bool,
-
-    /// Current stacking context visibility of backface.
-    pub is_backface_visible: bool,
-
-    /// Allow subpixel AA for text runs on this stacking context.
-    /// This is a temporary hack while we don't support subpixel AA
-    /// on transparent stacking contexts.
-    pub allow_subpixel_aa: bool,
-
-    /// Indicate that if any pritimive contained in this stacking context.
-    pub has_any_primitive: bool,
-
-    /// Union of all stacking context bounds of all children.
-    pub children_sc_bounds: LayerRect,
-}
-
-impl StackingContext {
-    pub fn new(
-        pipeline_id: PipelineId,
-        reference_frame_offset: LayerVector2D,
-        is_page_root: bool,
-        is_pipeline_root: bool,
-        reference_frame_id: ClipId,
-        transform_style: TransformStyle,
-        composite_ops: CompositeOps,
-        is_backface_visible: bool,
-    ) -> StackingContext {
-        let isolation = match transform_style {
-            TransformStyle::Flat => ContextIsolation::None,
-            TransformStyle::Preserve3D => ContextIsolation::Items,
-        };
-        let allow_subpixel_aa = composite_ops.count() == 0 &&
-                                isolation == ContextIsolation::None;
-        StackingContext {
-            pipeline_id,
-            reference_frame_offset,
-            reference_frame_id,
-            screen_bounds: DeviceIntRect::zero(),
-            isolated_items_bounds: LayerRect::zero(),
-            composite_ops,
-            isolation,
-            is_page_root,
-            is_pipeline_root,
-            is_visible: false,
-            is_backface_visible,
-            allow_subpixel_aa,
-            has_any_primitive: false,
-            children_sc_bounds: LayerRect::zero(),
-        }
-    }
-
-    pub fn can_contribute_to_scene(&self) -> bool {
-        !self.composite_ops.will_make_invisible()
-    }
-}
-
 #[derive(Debug, Clone, Default)]
 pub struct CompositeOps {
     // Requires only a single texture as input (e.g. most filters)
     pub filters: Vec<FilterOp>,
 
     // Requires two source textures (e.g. mix-blend-mode)
     pub mix_blend_mode: Option<MixBlendMode>,
 }
@@ -1850,25 +1971,16 @@ impl CompositeOps {
             filters,
             mix_blend_mode: mix_blend_mode,
         }
     }
 
     pub fn count(&self) -> usize {
         self.filters.len() + if self.mix_blend_mode.is_some() { 1 } else { 0 }
     }
-
-    pub fn will_make_invisible(&self) -> bool {
-        for op in &self.filters {
-            if op == &FilterOp::Opacity(PropertyBinding::Value(0.0)) {
-                return true;
-            }
-        }
-        false
-    }
 }
 
 /// A rendering-oriented representation of frame::Frame built by the render backend
 /// and presented to the renderer.
 pub struct Frame {
     pub window_size: DeviceUintSize,
     pub background_color: Option<ColorF>,
     pub device_pixel_ratio: f32,
@@ -1951,8 +2063,36 @@ impl BlurTask {
                 instances.push(BlurInstance {
                     region: *region,
                     ..instance
                 });
             }
         }
     }
 }
+
+/// Construct a polygon from stacking context boundaries.
+/// `anchor` here is an index that's going to be preserved in all the
+/// splits of the polygon.
+fn make_polygon(
+    rect: LayerRect,
+    transform: &LayerToWorldTransform,
+    anchor: usize,
+) -> Polygon<f64, WorldPixel> {
+    let mat = TypedTransform3D::row_major(
+        transform.m11 as f64,
+        transform.m12 as f64,
+        transform.m13 as f64,
+        transform.m14 as f64,
+        transform.m21 as f64,
+        transform.m22 as f64,
+        transform.m23 as f64,
+        transform.m24 as f64,
+        transform.m31 as f64,
+        transform.m32 as f64,
+        transform.m33 as f64,
+        transform.m34 as f64,
+        transform.m41 as f64,
+        transform.m42 as f64,
+        transform.m43 as f64,
+        transform.m44 as f64);
+    Polygon::from_transformed_rect(rect.cast().unwrap(), mat, anchor)
+}
--- a/gfx/webrender_api/src/api.rs
+++ b/gfx/webrender_api/src/api.rs
@@ -238,29 +238,33 @@ impl fmt::Debug for DocumentMsg {
             DocumentMsg::GenerateFrame(..) => "DocumentMsg::GenerateFrame",
             DocumentMsg::EnableFrameOutput(..) => "DocumentMsg::EnableFrameOutput",
         })
     }
 }
 
 #[derive(Debug, Clone, Deserialize, Serialize)]
 pub enum DebugCommand {
-    // Display the frame profiler on screen.
+    /// Display the frame profiler on screen.
     EnableProfiler(bool),
-    // Display all texture cache pages on screen.
+    /// Display all texture cache pages on screen.
     EnableTextureCacheDebug(bool),
-    // Display intermediate render targets on screen.
+    /// Display intermediate render targets on screen.
     EnableRenderTargetDebug(bool),
-    // Display alpha primitive rects.
+    /// Display alpha primitive rects.
     EnableAlphaRectsDebug(bool),
-    // Fetch current documents and display lists.
+    /// Display GPU timing results.
+    EnableGpuTimeQueries(bool),
+    /// Display GPU overdraw results
+    EnableGpuSampleQueries(bool),
+    /// Fetch current documents and display lists.
     FetchDocuments,
-    // Fetch current passes and batches.
+    /// Fetch current passes and batches.
     FetchPasses,
-    // Fetch clip-scroll tree.
+    /// Fetch clip-scroll tree.
     FetchClipScrollTree,
 }
 
 #[derive(Clone, Deserialize, Serialize)]
 pub enum ApiMsg {
     /// Add/remove/update images and fonts.
     UpdateResources(ResourceUpdates),
     /// Gets the glyph dimensions
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -466,17 +466,17 @@ pub enum MixBlendMode {
 #[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)]
 pub enum FilterOp {
     Blur(f32),
     Brightness(f32),
     Contrast(f32),
     Grayscale(f32),
     HueRotate(f32),
     Invert(f32),
-    Opacity(PropertyBinding<f32>),
+    Opacity(PropertyBinding<f32>, f32),
     Saturate(f32),
     Sepia(f32),
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct IframeDisplayItem {
     pub pipeline_id: PipelineId,
 }
--- a/gfx/webrender_bindings/webrender_ffi_generated.h
+++ b/gfx/webrender_bindings/webrender_ffi_generated.h
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-/* Generated with cbindgen:0.2.0 */
+/* Generated with cbindgen:0.2.2 */
 
 /* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen.
  * To generate this file:
  *   1. Get the latest cbindgen using `cargo install --force cbindgen`
  *      a. Alternatively, you can clone `https://github.com/rlhunt/cbindgen` and use a tagged release
  *   2. Run `rustup run nightly cbindgen toolkit/library/rust/ --crate webrender_bindings -o gfx/webrender_bindings/webrender_ffi_generated.h`
  */