Bug 1407213 - Update webrender to commit a624aa6d3b6006c510c8b14026567af4ac545d2f. r?jrmuizel draft
authorKartikaya Gupta <kgupta@mozilla.com>
Fri, 13 Oct 2017 12:51:07 -0400
changeset 680203 f030b7d29468351cc4d2358c3f36a502f80c2a20
parent 680024 196dadb2fe500e75c6fbddcac78106648676cf10
child 680204 e8fcddcc93c46c5a34a352d6f1a72cebcc486606
push id84421
push userkgupta@mozilla.com
push dateFri, 13 Oct 2017 17:00:08 +0000
reviewersjrmuizel
bugs1407213
milestone58.0a1
Bug 1407213 - Update webrender to commit a624aa6d3b6006c510c8b14026567af4ac545d2f. r?jrmuizel MozReview-Commit-ID: GlP4wiZE4OT
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/examples/common/image_helper.rs
gfx/webrender/examples/image_resize.rs
gfx/webrender/res/ps_split_composite.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/res/shared.glsl
gfx/webrender/src/clip_scroll_node.rs
gfx/webrender/src/clip_scroll_tree.rs
gfx/webrender/src/debug_render.rs
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/tiling.rs
gfx/webrender/tests/angle_shader_validation.rs
gfx/webrender_api/Cargo.toml
gfx/webrender_api/src/display_item.rs
gfx/webrender_api/src/display_list.rs
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 
 -------------------------------------------------------------------------------
 
 The version of WebRender currently in the tree is:
-6440dff485271cdfd24a22c920cea31e01e2b164
+a624aa6d3b6006c510c8b14026567af4ac545d2f
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -9,17 +9,17 @@ build = "build.rs"
 [features]
 default = ["freetype-lib"]
 freetype-lib = ["freetype/servo-freetype-sys"]
 profiler = ["thread_profiler/thread_profiler"]
 debugger = ["ws", "serde_json", "serde", "serde_derive"]
 
 [dependencies]
 app_units = "0.5.6"
-bincode = "0.8"
+bincode = "0.9"
 byteorder = "1.0"
 euclid = "0.15.2"
 fxhash = "0.2.1"
 gleam = "0.4.8"
 lazy_static = "0.2"
 log = "0.3"
 num-traits = "0.1.32"
 time = "0.1"
@@ -39,15 +39,15 @@ env_logger = "0.4"
 rand = "0.3"                # for the benchmarks
 servo-glutin = "0.12"     # for the example apps
 
 [target.'cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))'.dependencies]
 freetype = { version = "0.3", default-features = false }
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.4"
-gamma-lut = "0.2"
+gamma-lut = "0.2.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.4"
 core-graphics = "0.9"
 core-text = { version = "7.0", default-features = false }
-gamma-lut = "0.2"
+gamma-lut = "0.2.1"
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/examples/common/image_helper.rs
@@ -0,0 +1,16 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use webrender::api::{ImageData, ImageDescriptor, ImageFormat};
+
+pub fn make_checkerboard(width: u32, height: u32) -> (ImageDescriptor, ImageData) {
+    let mut image_data = Vec::new();
+    for y in 0 .. height {
+        for x in 0 .. width {
+            let lum = 255 * (((x & 8) == 0) ^ ((y & 8) == 0)) as u8;
+            image_data.extend_from_slice(&[lum, lum, lum, 0xff]);
+        }
+    }
+    (ImageDescriptor::new(width, height, ImageFormat::BGRA8, true), ImageData::new(image_data))
+}
--- a/gfx/webrender/examples/image_resize.rs
+++ b/gfx/webrender/examples/image_resize.rs
@@ -3,16 +3,18 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 extern crate gleam;
 extern crate glutin;
 extern crate webrender;
 
 #[path = "common/boilerplate.rs"]
 mod boilerplate;
+#[path = "common/image_helper.rs"]
+mod image_helper;
 
 use boilerplate::{Example, HandyDandyRectBuilder};
 use webrender::api::*;
 
 struct App {
     image_key: ImageKey,
 }
 
@@ -21,28 +23,21 @@ impl Example for App {
         &mut self,
         _api: &RenderApi,
         builder: &mut DisplayListBuilder,
         resources: &mut ResourceUpdates,
         _layout_size: LayoutSize,
         _pipeline_id: PipelineId,
         _document_id: DocumentId,
     ) {
-        let mut image_data = Vec::new();
-        for y in 0 .. 32 {
-            for x in 0 .. 32 {
-                let lum = 255 * (((x & 8) == 0) ^ ((y & 8) == 0)) as u8;
-                image_data.extend_from_slice(&[lum, lum, lum, 0xff]);
-            }
-        }
-
+        let (image_descriptor, image_data) = image_helper::make_checkerboard(32, 32);
         resources.add_image(
             self.image_key,
-            ImageDescriptor::new(32, 32, ImageFormat::BGRA8, true),
-            ImageData::new(image_data),
+            image_descriptor,
+            image_data,
             None,
         );
 
         let bounds = (0, 0).to(512, 512);
         let info = LayoutPrimitiveInfo::new(bounds);
         builder.push_stacking_context(
             &info,
             ScrollPolicy::Scrollable,
--- a/gfx/webrender/res/ps_split_composite.glsl
+++ b/gfx/webrender/res/ps_split_composite.glsl
@@ -33,21 +33,25 @@ vec3 bilerp(vec3 a, vec3 b, vec3 c, vec3
     vec3 y = mix(c, d, t);
     return mix(x, y, s);
 }
 
 void main(void) {
     CompositeInstance ci = fetch_composite_instance();
     SplitGeometry geometry = fetch_split_geometry(ci.user_data0);
     AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
+    AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
+
+    vec2 dest_origin = dest_task.render_target_origin -
+                       dest_task.screen_space_origin;
 
     vec3 world_pos = bilerp(geometry.points[0], geometry.points[1],
                             geometry.points[3], geometry.points[2],
                             aPosition.y, aPosition.x);
-    vec4 final_pos = vec4(world_pos.xy * uDevicePixelRatio, ci.z, 1.0);
+    vec4 final_pos = vec4((world_pos.xy + dest_origin) * uDevicePixelRatio, ci.z, 1.0);
 
     gl_Position = uTransform * final_pos;
 
     vec2 uv_origin = src_task.render_target_origin;
     vec2 uv_pos = uv_origin + world_pos.xy - src_task.screen_space_origin;
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
     vUv = vec3(uv_pos / texture_size, src_task.render_target_layer_index);
     vUvTaskBounds = vec4(uv_origin, uv_origin + src_task.size) / texture_size.xyxy;
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -59,26 +59,43 @@ void main(void) {
 
     vColor = vec4(text.color.rgb * text.color.a, text.color.a);
     vUv = vec3(mix(st0, st1, f), res.layer);
     vUvBorder = (res.uv_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
+
+#define MODE_ALPHA          0
+#define MODE_SUBPX_PASS0    1
+#define MODE_SUBPX_PASS1    2
+
 void main(void) {
     vec3 tc = vec3(clamp(vUv.xy, vUvBorder.xy, vUvBorder.zw), vUv.z);
-#ifdef WR_FEATURE_SUBPIXEL_AA
-    //note: the blend mode is not compatible with clipping
-    oFragColor = texture(sColor0, tc);
-#else
-    vec4 color = texture(sColor0, tc) * vColor;
+    vec4 color = texture(sColor0, tc);
+
     float alpha = 1.0;
 #ifdef WR_FEATURE_TRANSFORM
-    float a = 0.0;
-    init_transform_fs(vLocalPos, a);
-    alpha *= a;
+    init_transform_fs(vLocalPos, alpha);
 #endif
-    alpha = min(alpha, do_clip());
-    oFragColor = color * alpha;
-#endif
+    alpha *= do_clip();
+
+    // TODO(gw): It would be worth profiling this and seeing
+    //           if we should instead handle the mode via
+    //           a combination of mix() etc. Branching on
+    //           a uniform is probably fast in most GPUs now though?
+    vec4 modulate_color = vec4(0.0);
+    switch (uMode) {
+        case MODE_ALPHA:
+            modulate_color = alpha * vColor;
+            break;
+        case MODE_SUBPX_PASS0:
+            modulate_color = vec4(alpha);
+            break;
+        case MODE_SUBPX_PASS1:
+            modulate_color = vColor;
+            break;
+    }
+
+    oFragColor = color * modulate_color;
 }
 #endif
--- a/gfx/webrender/res/shared.glsl
+++ b/gfx/webrender/res/shared.glsl
@@ -34,16 +34,20 @@
     // Attribute inputs
     in vec3 aPosition;
 #endif
 
 //======================================================================================
 // Fragment shader attributes and uniforms
 //======================================================================================
 #ifdef WR_FRAGMENT_SHADER
+    // A generic uniform that shaders can optionally use to configure
+    // an operation mode for this batch.
+    uniform int uMode;
+
     // Uniform inputs
 
     // Fragment shader outputs
     out vec4 oFragColor;
 #endif
 
 //======================================================================================
 // Shared shader uniforms
--- a/gfx/webrender/src/clip_scroll_node.rs
+++ b/gfx/webrender/src/clip_scroll_node.rs
@@ -53,17 +53,17 @@ pub enum NodeType {
     /// Transforms it's content, but doesn't clip it. Can also be adjusted
     /// by scroll events or setting scroll offsets.
     ScrollFrame(ScrollingState),
 
     /// A special kind of node that adjusts its position based on the position
     /// of its parent node and a given set of sticky positioning constraints.
     /// Sticky positioned is described in the CSS Positioned Layout Module Level 3 here:
     /// https://www.w3.org/TR/css-position-3/#sticky-pos
-    StickyFrame(StickyFrameInfo),
+    StickyFrame(StickyFrameInfo, LayerVector2D),
 }
 
 /// Contains information common among all types of ClipScrollTree nodes.
 #[derive(Debug)]
 pub struct ClipScrollNode {
     /// Viewing rectangle in the coordinate system of the parent reference frame.
     pub local_viewport_rect: LayerRect,
 
@@ -189,17 +189,17 @@ impl ClipScrollNode {
             local_clip_rect: frame_rect,
             combined_local_viewport_rect: LayerRect::zero(),
             world_viewport_transform: LayerToWorldTransform::identity(),
             world_content_transform: LayerToWorldTransform::identity(),
             reference_frame_relative_scroll_offset: LayerVector2D::zero(),
             parent: Some(parent_id),
             children: Vec::new(),
             pipeline_id,
-            node_type: NodeType::StickyFrame(sticky_frame_info),
+            node_type: NodeType::StickyFrame(sticky_frame_info, LayerVector2D::zero()),
         }
     }
 
 
     pub fn add_child(&mut self, child: ClipId) {
         self.children.push(child);
     }
 
@@ -251,50 +251,48 @@ impl ClipScrollNode {
 
         scrolling.offset = new_offset;
         scrolling.bouncing_back = false;
         scrolling.started_bouncing_back = false;
         true
     }
 
     pub fn update_transform(&mut self, state: &TransformUpdateState) {
-        let scrolled_parent_combined_clip = state
-            .parent_combined_viewport_rect
-            .translate(&-state.parent_scroll_offset);
+        // We calculate this here to avoid a double-borrow later.
+        let sticky_offset = self.calculate_sticky_offset(
+            &state.nearest_scrolling_ancestor_offset,
+            &state.nearest_scrolling_ancestor_viewport,
+        );
 
         let (local_transform, accumulated_scroll_offset) = match self.node_type {
             NodeType::ReferenceFrame(ref info) => {
                 self.combined_local_viewport_rect = info.transform
                     .with_destination::<LayerPixel>()
-                    .inverse_rect_footprint(&scrolled_parent_combined_clip);
+                    .inverse_rect_footprint(&state.parent_combined_viewport_rect);
                 self.reference_frame_relative_scroll_offset = LayerVector2D::zero();
                 (info.transform, state.parent_accumulated_scroll_offset)
             }
             NodeType::Clip(_) | NodeType::ScrollFrame(_) => {
                 // Move the parent's viewport into the local space (of the node origin)
                 // and intersect with the local clip rectangle to get the local viewport.
-                self.combined_local_viewport_rect = scrolled_parent_combined_clip
+                self.combined_local_viewport_rect =
+                    state.parent_combined_viewport_rect
                     .intersection(&self.local_clip_rect)
                     .unwrap_or(LayerRect::zero());
                 self.reference_frame_relative_scroll_offset =
                     state.parent_accumulated_scroll_offset;
                 (
                     LayerToScrollTransform::identity(),
                     self.reference_frame_relative_scroll_offset,
                 )
             }
-            NodeType::StickyFrame(sticky_frame_info) => {
-                let sticky_offset = self.calculate_sticky_offset(
-                    &self.local_viewport_rect,
-                    &sticky_frame_info,
-                    &state.nearest_scrolling_ancestor_offset,
-                    &state.nearest_scrolling_ancestor_viewport,
-                );
-
-                self.combined_local_viewport_rect = scrolled_parent_combined_clip
+            NodeType::StickyFrame(_, ref mut node_sticky_offset) => {
+                *node_sticky_offset = sticky_offset;
+                self.combined_local_viewport_rect =
+                    state.parent_combined_viewport_rect
                     .translate(&-sticky_offset)
                     .intersection(&self.local_clip_rect)
                     .unwrap_or(LayerRect::zero());
                 self.reference_frame_relative_scroll_offset =
                     state.parent_accumulated_scroll_offset + sticky_offset;
                 (
                     LayerToScrollTransform::identity(),
                     self.reference_frame_relative_scroll_offset,
@@ -316,22 +314,25 @@ impl ClipScrollNode {
         // whatever scrolling offset we supply as well.
         let scroll_offset = self.scroll_offset();
         self.world_content_transform = self.world_viewport_transform
             .pre_translate(scroll_offset.to_3d());
     }
 
     fn calculate_sticky_offset(
         &self,
-        sticky_rect: &LayerRect,
-        sticky_frame_info: &StickyFrameInfo,
         viewport_scroll_offset: &LayerVector2D,
         viewport_rect: &LayerRect,
     ) -> LayerVector2D {
-        let sticky_rect = sticky_rect.translate(viewport_scroll_offset);
+        let sticky_frame_info = match self.node_type {
+            NodeType::StickyFrame(info, _) => info,
+            _ => return LayerVector2D::zero(),
+        };
+
+        let sticky_rect = self.local_viewport_rect.translate(viewport_scroll_offset);
         let mut sticky_offset = LayerVector2D::zero();
 
         if let Some(info) = sticky_frame_info.top {
             sticky_offset.y = viewport_rect.min_y() + info.margin - sticky_rect.min_y();
             sticky_offset.y = sticky_offset.y.max(0.0).min(info.max_offset);
         }
 
         if sticky_offset.y == 0.0 {
--- a/gfx/webrender/src/clip_scroll_tree.rs
+++ b/gfx/webrender/src/clip_scroll_tree.rs
@@ -36,17 +36,16 @@ pub struct ClipScrollTree {
     /// A set of pipelines which should be discarded the next time this
     /// tree is drained.
     pub pipelines_to_discard: FastHashSet<PipelineId>,
 }
 
 pub struct TransformUpdateState {
     pub parent_reference_frame_transform: LayerToWorldTransform,
     pub parent_combined_viewport_rect: LayerRect,
-    pub parent_scroll_offset: LayerVector2D,
     pub parent_accumulated_scroll_offset: LayerVector2D,
     pub nearest_scrolling_ancestor_offset: LayerVector2D,
     pub nearest_scrolling_ancestor_viewport: LayerRect,
 }
 
 impl ClipScrollTree {
     pub fn new() -> ClipScrollTree {
         let dummy_pipeline = PipelineId::dummy();
@@ -307,17 +306,16 @@ impl ClipScrollTree {
         let root_viewport = self.nodes[&root_reference_frame_id].local_clip_rect;
         let state = TransformUpdateState {
             parent_reference_frame_transform: LayerToWorldTransform::create_translation(
                 pan.x,
                 pan.y,
                 0.0,
             ),
             parent_combined_viewport_rect: root_viewport,
-            parent_scroll_offset: LayerVector2D::zero(),
             parent_accumulated_scroll_offset: LayerVector2D::zero(),
             nearest_scrolling_ancestor_offset: LayerVector2D::zero(),
             nearest_scrolling_ancestor_viewport: LayerRect::zero(),
         };
         self.update_node_transform(root_reference_frame_id, &state);
     }
 
     fn update_node_transform(&mut self, layer_id: ClipId, state: &TransformUpdateState) {
@@ -333,37 +331,44 @@ impl ClipScrollTree {
             // The transformation we are passing is the transformation of the parent
             // reference frame and the offset is the accumulated offset of all the nodes
             // between us and the parent reference frame. If we are a reference frame,
             // we need to reset both these values.
             let state = match node.node_type {
                 NodeType::ReferenceFrame(ref info) => TransformUpdateState {
                     parent_reference_frame_transform: node.world_viewport_transform,
                     parent_combined_viewport_rect: node.combined_local_viewport_rect,
-                    parent_scroll_offset: LayerVector2D::zero(),
                     parent_accumulated_scroll_offset: LayerVector2D::zero(),
                     nearest_scrolling_ancestor_viewport: state
                         .nearest_scrolling_ancestor_viewport
                         .translate(&info.origin_in_parent_reference_frame),
                     ..*state
                 },
-                NodeType::Clip(..) | NodeType::StickyFrame(..) => TransformUpdateState {
+                NodeType::Clip(..) => TransformUpdateState {
                     parent_combined_viewport_rect: node.combined_local_viewport_rect,
-                    parent_scroll_offset: LayerVector2D::zero(),
                     ..*state
                 },
                 NodeType::ScrollFrame(ref scrolling) => TransformUpdateState {
-                    parent_combined_viewport_rect: node.combined_local_viewport_rect,
-                    parent_scroll_offset: scrolling.offset,
+                    parent_combined_viewport_rect:
+                        node.combined_local_viewport_rect.translate(&-scrolling.offset),
                     parent_accumulated_scroll_offset: scrolling.offset +
                         state.parent_accumulated_scroll_offset,
                     nearest_scrolling_ancestor_offset: scrolling.offset,
                     nearest_scrolling_ancestor_viewport: node.local_viewport_rect,
                     ..*state
                 },
+                NodeType::StickyFrame(_, sticky_offset) => TransformUpdateState {
+                    // We don't translate the combined rect by the sticky offset, because sticky
+                    // offsets actually adjust the node position itself, whereas scroll offsets
+                    // only apply to contents inside the node.
+                    parent_combined_viewport_rect: node.combined_local_viewport_rect,
+                    parent_accumulated_scroll_offset:
+                        sticky_offset + state.parent_accumulated_scroll_offset,
+                    ..*state
+                }
             };
 
             (state, node.children.clone())
         };
 
         for child_layer_id in node_children {
             self.update_node_transform(child_layer_id, &state);
         }
@@ -474,19 +479,20 @@ impl ClipScrollTree {
             NodeType::ReferenceFrame(ref info) => {
                 pt.new_level(format!("ReferenceFrame {:?}", info.transform));
             }
             NodeType::ScrollFrame(scrolling_info) => {
                 pt.new_level(format!("ScrollFrame"));
                 pt.add_item(format!("scrollable_size: {:?}", scrolling_info.scrollable_size));
                 pt.add_item(format!("scroll.offset: {:?}", scrolling_info.offset));
             }
-            NodeType::StickyFrame(sticky_frame_info) => {
+            NodeType::StickyFrame(sticky_frame_info, sticky_offset) => {
                 pt.new_level(format!("StickyFrame"));
                 pt.add_item(format!("sticky info: {:?}", sticky_frame_info));
+                pt.add_item(format!("sticky offset: {:?}", sticky_offset));
             }
         }
 
         pt.add_item(format!(
             "local_viewport_rect: {:?}",
             node.local_viewport_rect
         ));
         pt.add_item(format!("local_clip_rect: {:?}", node.local_clip_rect));
--- a/gfx/webrender/src/debug_render.rs
+++ b/gfx/webrender/src/debug_render.rs
@@ -275,44 +275,44 @@ impl DebugRenderer {
             0.0,
             ORTHO_NEAR_PLANE,
             ORTHO_FAR_PLANE,
         );
 
         // Triangles
         if !self.tri_vertices.is_empty() {
             device.bind_program(&self.color_program);
-            device.set_uniforms(&self.color_program, &projection);
+            device.set_uniforms(&self.color_program, &projection, 0);
             device.bind_vao(&self.tri_vao);
             device.update_vao_indices(&self.tri_vao, &self.tri_indices, VertexUsageHint::Dynamic);
             device.update_vao_main_vertices(
                 &self.tri_vao,
                 &self.tri_vertices,
                 VertexUsageHint::Dynamic,
             );
             device.draw_triangles_u32(0, self.tri_indices.len() as i32);
         }
 
         // Lines
         if !self.line_vertices.is_empty() {
             device.bind_program(&self.color_program);
-            device.set_uniforms(&self.color_program, &projection);
+            device.set_uniforms(&self.color_program, &projection, 0);
             device.bind_vao(&self.line_vao);
             device.update_vao_main_vertices(
                 &self.line_vao,
                 &self.line_vertices,
                 VertexUsageHint::Dynamic,
             );
             device.draw_nonindexed_lines(0, self.line_vertices.len() as i32);
         }
 
         // Glyph
         if !self.font_indices.is_empty() {
             device.bind_program(&self.font_program);
-            device.set_uniforms(&self.font_program, &projection);
+            device.set_uniforms(&self.font_program, &projection, 0);
             device.bind_texture(DebugSampler::Font, &self.font_texture);
             device.bind_vao(&self.font_vao);
             device.update_vao_indices(&self.font_vao, &self.font_indices, VertexUsageHint::Dynamic);
             device.update_vao_main_vertices(
                 &self.font_vao,
                 &self.font_vertices,
                 VertexUsageHint::Dynamic,
             );
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -1,14 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use super::shader_source;
-use api::{ColorF, ImageFormat};
+use api::ImageFormat;
 use api::{DeviceIntRect, DeviceUintSize};
 use euclid::Transform3D;
 use gleam::gl;
 use internal_types::RenderTargetMode;
 use std::fs::File;
 use std::io::Read;
 use std::iter::repeat;
 use std::mem;
@@ -425,16 +425,17 @@ impl Drop for Texture {
         debug_assert!(thread::panicking() || self.id == 0);
     }
 }
 
 pub struct Program {
     id: gl::GLuint,
     u_transform: gl::GLint,
     u_device_pixel_ratio: gl::GLint,
+    u_mode: gl::GLint,
 }
 
 impl Drop for Program {
     fn drop(&mut self) {
         debug_assert!(
             thread::panicking() || self.id == 0,
             "renderer::deinit not called"
         );
@@ -1407,21 +1408,23 @@ impl Device {
                 error_log
             );
             self.gl.delete_program(pid);
             return Err(ShaderError::Link(base_filename.to_string(), error_log));
         }
 
         let u_transform = self.gl.get_uniform_location(pid, "uTransform");
         let u_device_pixel_ratio = self.gl.get_uniform_location(pid, "uDevicePixelRatio");
+        let u_mode = self.gl.get_uniform_location(pid, "uMode");
 
         let program = Program {
             id: pid,
             u_transform,
             u_device_pixel_ratio,
+            u_mode,
         };
 
         self.bind_program(&program);
 
         Ok(program)
     }
 
     pub fn bind_shader_samplers<S>(&mut self, program: &Program, bindings: &[(&'static str, S)])
@@ -1443,22 +1446,29 @@ impl Device {
     }
 
     pub fn set_uniform_2f(&self, uniform: UniformLocation, x: f32, y: f32) {
         debug_assert!(self.inside_frame);
         let UniformLocation(location) = uniform;
         self.gl.uniform_2f(location, x, y);
     }
 
-    pub fn set_uniforms(&self, program: &Program, transform: &Transform3D<f32>) {
+    pub fn set_uniforms(
+        &self,
+        program: &Program,
+        transform: &Transform3D<f32>,
+        mode: i32,
+    ) {
         debug_assert!(self.inside_frame);
         self.gl
             .uniform_matrix_4fv(program.u_transform, false, &transform.to_row_major_array());
         self.gl
             .uniform_1f(program.u_device_pixel_ratio, self.device_pixel_ratio);
+        self.gl
+            .uniform_1i(program.u_mode, mode);
     }
 
     pub fn create_pbo(&mut self) -> PBO {
         let id = self.gl.gen_buffers(1)[0];
         PBO { id }
     }
 
     pub fn delete_pbo(&mut self, mut pbo: PBO) {
@@ -1842,37 +1852,37 @@ impl Device {
             gl::SRC_ALPHA,
             gl::ONE_MINUS_SRC_ALPHA,
             gl::ONE,
             gl::ONE_MINUS_SRC_ALPHA,
         );
         self.gl.blend_equation(gl::FUNC_ADD);
     }
 
-    pub fn set_blend_mode_subpixel(&self, color: ColorF) {
-        self.gl.blend_color(color.r, color.g, color.b, color.a);
-        self.gl
-            .blend_func(gl::CONSTANT_COLOR, gl::ONE_MINUS_SRC_COLOR);
-    }
-
     pub fn set_blend_mode_multiply(&self) {
         self.gl
             .blend_func_separate(gl::ZERO, gl::SRC_COLOR, gl::ZERO, gl::SRC_ALPHA);
         self.gl.blend_equation(gl::FUNC_ADD);
     }
     pub fn set_blend_mode_max(&self) {
         self.gl
             .blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE);
         self.gl.blend_equation_separate(gl::MAX, gl::FUNC_ADD);
     }
     pub fn set_blend_mode_min(&self) {
         self.gl
             .blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE);
         self.gl.blend_equation_separate(gl::MIN, gl::FUNC_ADD);
     }
+    pub fn set_blend_mode_subpixel_pass0(&self) {
+        self.gl.blend_func(gl::ZERO, gl::ONE_MINUS_SRC_COLOR);
+    }
+    pub fn set_blend_mode_subpixel_pass1(&self) {
+        self.gl.blend_func(gl::ONE, gl::ONE);
+    }
 }
 
 /// return (gl_internal_format, gl_format)
 fn gl_texture_formats_for_image_format(
     gl: &gl::Gl,
     format: ImageFormat,
 ) -> (gl::GLint, gl::GLuint) {
     match format {
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -1,8 +1,9 @@
+
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BuiltDisplayListIter, ClipAndScrollInfo, ClipId, ColorF, ComplexClipRegion};
 use api::{DeviceUintRect, DeviceUintSize, DisplayItemRef, Epoch, FilterOp, HitTestFlags};
 use api::{HitTestResult, ImageDisplayItem, ItemRange, LayerPoint, LayerPrimitiveInfo, LayerRect};
 use api::{LayerSize, LayerToScrollTransform, LayerVector2D, LayoutSize, LayoutTransform};
@@ -682,18 +683,18 @@ impl Frame {
             }
             SpecificDisplayItem::PushShadow(shadow) => {
                 let mut prim_info = prim_info.clone();
                 prim_info.rect = LayerRect::zero();
                 context
                     .builder
                     .push_shadow(shadow, clip_and_scroll, &prim_info);
             }
-            SpecificDisplayItem::PopShadow => {
-                context.builder.pop_shadow();
+            SpecificDisplayItem::PopAllShadows => {
+                context.builder.pop_all_shadows();
             }
         }
         None
     }
 
     fn flatten_root<'a>(
         &mut self,
         traversal: &mut BuiltDisplayListIter<'a>,
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -3,17 +3,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderDetails, BorderDisplayItem, BorderRadius, BoxShadowClipMode, BuiltDisplayList};
 use api::{ClipAndScrollInfo, ClipId, ColorF};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceUintRect, DeviceUintSize};
 use api::{ExtendMode, FIND_ALL, FilterOp, FontInstance, FontRenderMode};
 use api::{GlyphInstance, GlyphOptions, GradientStop, HitTestFlags, HitTestItem, HitTestResult};
 use api::{ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerPrimitiveInfo, LayerRect};
-use api::{LayerSize, LayerToScrollTransform, LayerVector2D, LayoutVector2D, LineOrientation};
+use api::{LayerPixel, LayerSize, LayerToScrollTransform, LayerVector2D, LayoutVector2D, LineOrientation};
 use api::{LineStyle, LocalClip, POINT_RELATIVE_TO_PIPELINE_VIEWPORT, PipelineId, RepeatMode};
 use api::{ScrollSensitivity, Shadow, TileOffset, TransformStyle};
 use api::{WorldPixel, WorldPoint, YuvColorSpace, YuvData, device_length};
 use app_units::Au;
 use border::ImageBorderSegment;
 use clip::{ClipMode, ClipRegion, ClipSource, ClipSources, ClipStore, Contains};
 use clip_scroll_node::{ClipInfo, ClipScrollNode, NodeType};
 use clip_scroll_tree::ClipScrollTree;
@@ -97,17 +97,21 @@ pub struct FrameBuilder {
     stacking_context_store: Vec<StackingContext>,
     clip_scroll_group_store: Vec<ClipScrollGroup>,
     // Note: value here is meant to be `ClipScrollGroupIndex`,
     // but we already have `ClipAndScrollInfo` in the key
     clip_scroll_group_indices: FastHashMap<ClipAndScrollInfo, usize>,
     packed_layers: Vec<PackedLayer>,
 
     // A stack of the current shadow primitives.
-    shadow_prim_stack: Vec<PrimitiveIndex>,
+    // The sub-Vec stores a buffer of fast-path primitives to be appended on pop.
+    shadow_prim_stack: Vec<(PrimitiveIndex, Vec<(PrimitiveIndex, ClipAndScrollInfo)>)>,
+    // If we're doing any fast-path shadows, we buffer the "real"
+    // content here, to be appended when the shadow stack is empty.
+    pending_shadow_contents: Vec<(PrimitiveIndex, ClipAndScrollInfo, LayerPrimitiveInfo)>,
 
     scrollbar_prims: Vec<ScrollbarPrimitive>,
 
     /// A stack of scroll nodes used during display list processing to properly
     /// parent new scroll nodes.
     reference_frame_stack: Vec<ClipId>,
 
     /// A stack of stacking contexts used for creating ClipScrollGroups as
@@ -215,16 +219,17 @@ impl FrameBuilder {
             Some(prev) => FrameBuilder {
                 stacking_context_store: recycle_vec(prev.stacking_context_store),
                 clip_scroll_group_store: recycle_vec(prev.clip_scroll_group_store),
                 clip_scroll_group_indices: FastHashMap::default(),
                 cmds: recycle_vec(prev.cmds),
                 hit_testing_runs: recycle_vec(prev.hit_testing_runs),
                 packed_layers: recycle_vec(prev.packed_layers),
                 shadow_prim_stack: recycle_vec(prev.shadow_prim_stack),
+                pending_shadow_contents: recycle_vec(prev.pending_shadow_contents),
                 scrollbar_prims: recycle_vec(prev.scrollbar_prims),
                 reference_frame_stack: recycle_vec(prev.reference_frame_stack),
                 stacking_context_stack: recycle_vec(prev.stacking_context_stack),
                 prim_store: prev.prim_store.recycle(),
                 clip_store: prev.clip_store.recycle(),
                 screen_size,
                 background_color,
                 config,
@@ -233,16 +238,17 @@ impl FrameBuilder {
             None => FrameBuilder {
                 stacking_context_store: Vec::new(),
                 clip_scroll_group_store: Vec::new(),
                 clip_scroll_group_indices: FastHashMap::default(),
                 cmds: Vec::new(),
                 hit_testing_runs: Vec::new(),
                 packed_layers: Vec::new(),
                 shadow_prim_stack: Vec::new(),
+                pending_shadow_contents: Vec::new(),
                 scrollbar_prims: Vec::new(),
                 reference_frame_stack: Vec::new(),
                 stacking_context_stack: Vec::new(),
                 prim_store: PrimitiveStore::new(),
                 clip_store: ClipStore::new(),
                 screen_size,
                 background_color,
                 config,
@@ -586,40 +592,59 @@ impl FrameBuilder {
         info: &LayerPrimitiveInfo,
     ) {
         let prim = PicturePrimitive::new_shadow(shadow);
 
         // Create an empty shadow primitive. Insert it into
         // the draw lists immediately so that it will be drawn
         // before any visual text elements that are added as
         // part of this shadow context.
-        let prim_index = self.add_primitive(
+        let prim_index = self.create_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
             PrimitiveContainer::Picture(prim),
         );
 
-        self.shadow_prim_stack.push(prim_index);
+        let pending = vec![(prim_index, clip_and_scroll)];
+        self.shadow_prim_stack.push((prim_index, pending));
     }
 
-    pub fn pop_shadow(&mut self) {
-        let prim_index = self.shadow_prim_stack
-            .pop()
-            .expect("invalid shadow push/pop count");
+    pub fn pop_all_shadows(&mut self) {
+        assert!(self.shadow_prim_stack.len() > 0, "popped shadows, but none were present");
+
+        // Borrowcheck dance
+        let mut shadows = mem::replace(&mut self.shadow_prim_stack, Vec::new());
+        for (prim_index, pending_primitives) in shadows.drain(..) {
+            {
+                // By now, the local rect of the text shadow has been calculated. It
+                // is calculated as the items in the shadow are added. It's now
+                // safe to offset the local rect by the offset of the shadow, which
+                // is then used when blitting the shadow to the final location.
+                let metadata = &mut self.prim_store.cpu_metadata[prim_index.0];
+                let prim = &self.prim_store.cpu_pictures[metadata.cpu_prim_index.0];
+                let shadow = prim.as_shadow();
 
-        // By now, the local rect of the text shadow has been calculated. It
-        // is calculated as the items in the shadow are added. It's now
-        // safe to offset the local rect by the offset of the shadow, which
-        // is then used when blitting the shadow to the final location.
-        let metadata = &mut self.prim_store.cpu_metadata[prim_index.0];
-        let prim = &self.prim_store.cpu_pictures[metadata.cpu_prim_index.0];
-        let shadow = prim.as_shadow();
+                metadata.local_rect = metadata.local_rect.translate(&shadow.offset);
+            }
+
+            // Push any fast-path shadows now
+            for (prim_index, clip_and_scroll) in pending_primitives {
+                self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
+            }
+        }
 
-        metadata.local_rect = metadata.local_rect.translate(&shadow.offset);
+        let mut pending_primitives = mem::replace(&mut self.pending_shadow_contents, Vec::new());
+        for (prim_index, clip_and_scroll, info) in pending_primitives.drain(..) {
+            self.add_primitive_to_hit_testing_list(&info, clip_and_scroll);
+            self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
+        }
+
+        mem::replace(&mut self.pending_shadow_contents, pending_primitives);
+        mem::replace(&mut self.shadow_prim_stack, shadows);
     }
 
     pub fn add_solid_rectangle(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
         color: &ColorF,
         flags: PrimitiveFlags,
@@ -677,52 +702,58 @@ impl FrameBuilder {
 
         let line = LinePrimitive {
             color: *color,
             style: style,
             orientation: orientation,
         };
 
         let mut fast_shadow_prims = Vec::new();
-        for shadow_prim_index in &self.shadow_prim_stack {
+        for (idx, &(shadow_prim_index, _)) in self.shadow_prim_stack.iter().enumerate() {
             let shadow_metadata = &self.prim_store.cpu_metadata[shadow_prim_index.0];
             let picture = &self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
             let shadow = picture.as_shadow();
             if shadow.blur_radius == 0.0 {
-                fast_shadow_prims.push(shadow.clone());
+                fast_shadow_prims.push((idx, shadow.clone()));
             }
         }
-        for shadow in fast_shadow_prims {
+
+        for (idx, shadow) in fast_shadow_prims {
             let mut line = line.clone();
             line.color = shadow.color;
             let mut info = info.clone();
             info.rect = new_rect.translate(&shadow.offset);
-            self.add_primitive(
+            let prim_index = self.create_primitive(
                 clip_and_scroll,
                 &info,
                 Vec::new(),
                 PrimitiveContainer::Line(line),
             );
+            self.shadow_prim_stack[idx].1.push((prim_index, clip_and_scroll));
         }
 
         let mut info = info.clone();
         info.rect = new_rect;
         let prim_index = self.create_primitive(
             clip_and_scroll,
             &info,
             Vec::new(),
             PrimitiveContainer::Line(line),
         );
 
         if color.a > 0.0 {
-            self.add_primitive_to_hit_testing_list(&info, clip_and_scroll);
-            self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
+            if self.shadow_prim_stack.is_empty() {
+                self.add_primitive_to_hit_testing_list(&info, clip_and_scroll);
+                self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
+            } else {
+                self.pending_shadow_contents.push((prim_index, clip_and_scroll, info));
+            }
         }
 
-        for shadow_prim_index in &self.shadow_prim_stack {
+        for &(shadow_prim_index, _) in &self.shadow_prim_stack {
             let shadow_metadata = &mut self.prim_store.cpu_metadata[shadow_prim_index.0];
             debug_assert_eq!(shadow_metadata.prim_kind, PrimitiveKind::Picture);
             let picture =
                 &mut self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
             let blur_radius = picture.as_shadow().blur_radius;
 
             // Only run real blurs here (fast path zero blurs are handled above).
             if blur_radius > 0.0 {
@@ -1137,44 +1168,40 @@ impl FrameBuilder {
             .limit_by(font.render_mode);
         if let Some(options) = glyph_options {
             render_mode = render_mode.limit_by(options.render_mode);
         }
 
         // There are some conditions under which we can't use
         // subpixel text rendering, even if enabled.
         if render_mode == FontRenderMode::Subpixel {
-            if color.a != 1.0 {
-                render_mode = FontRenderMode::Alpha;
-            }
-
             // text on a stacking context that has filters
             // (e.g. opacity) can't use sub-pixel.
             // TODO(gw): It's possible we can relax this in
             //           the future, if we modify the way
             //           we handle subpixel blending.
             if let Some(sc_index) = self.stacking_context_stack.last() {
                 let stacking_context = &self.stacking_context_store[sc_index.0];
-                if stacking_context.composite_ops.count() > 0 {
+                if !stacking_context.allow_subpixel_aa {
                     render_mode = FontRenderMode::Alpha;
                 }
             }
         }
 
         let prim_font = FontInstance::new(
             font.font_key,
             font.size,
             *color,
             render_mode,
             font.subpx_dir,
             font.platform_options,
             font.variations.clone(),
             font.synthetic_italics,
         );
-        let prim = TextRunPrimitiveCpu {
+        let mut prim = TextRunPrimitiveCpu {
             font: prim_font,
             glyph_range,
             glyph_count,
             glyph_gpu_blocks: Vec::new(),
             glyph_keys: Vec::new(),
             offset: run_offset,
         };
 
@@ -1182,68 +1209,80 @@ impl FrameBuilder {
         // text elements to get pixel perfect results for reftests. It's also a big
         // performance win to avoid blurs and render target allocations where
         // possible. For any text shadows that have zero blur, create a normal text
         // primitive with the shadow's color and offset. These need to be added
         // *before* the visual text primitive in order to get the correct paint
         // order. Store them in a Vec first to work around borrowck issues.
         // TODO(gw): Refactor to avoid having to store them in a Vec first.
         let mut fast_shadow_prims = Vec::new();
-        for shadow_prim_index in &self.shadow_prim_stack {
+        for (idx, &(shadow_prim_index, _)) in self.shadow_prim_stack.iter().enumerate() {
             let shadow_metadata = &self.prim_store.cpu_metadata[shadow_prim_index.0];
             let picture_prim = &self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
             let shadow = picture_prim.as_shadow();
             if shadow.blur_radius == 0.0 {
                 let mut text_prim = prim.clone();
                 text_prim.font.color = shadow.color.into();
                 // If we have translucent text, we need to ensure it won't go
                 // through the subpixel blend mode, which doesn't work with
                 // traditional alpha blending.
                 if shadow.color.a != 1.0 {
                     text_prim.font.render_mode = text_prim.font.render_mode.limit_by(FontRenderMode::Alpha);
                 }
                 text_prim.offset += shadow.offset;
-                fast_shadow_prims.push(text_prim);
+                fast_shadow_prims.push((idx, text_prim));
             }
         }
-        for text_prim in fast_shadow_prims {
+
+        for (idx, text_prim) in fast_shadow_prims {
             let rect = info.rect;
             let mut info = info.clone();
             info.rect = rect.translate(&text_prim.offset);
-            self.add_primitive(
+            let prim_index = self.create_primitive(
                 clip_and_scroll,
                 &info,
                 Vec::new(),
                 PrimitiveContainer::TextRun(text_prim),
             );
+            self.shadow_prim_stack[idx].1.push((prim_index, clip_and_scroll));
+        }
+
+        // We defer this until after fast-shadows so that shadows of transparent text
+        // get subpixel-aa
+        if color.a != 1.0 {
+            prim.font.render_mode = FontRenderMode::Alpha;
         }
 
         // Create (and add to primitive store) the primitive that will be
         // used for both the visual element and also the shadow(s).
         let prim_index = self.create_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
             PrimitiveContainer::TextRun(prim),
         );
 
         // Only add a visual element if it can contribute to the scene.
         if color.a > 0.0 {
-            self.add_primitive_to_hit_testing_list(info, clip_and_scroll);
-            self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
+            if self.shadow_prim_stack.is_empty() {
+                self.add_primitive_to_hit_testing_list(info, clip_and_scroll);
+                self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
+            } else {
+                self.pending_shadow_contents.push((prim_index, clip_and_scroll, *info));
+            }
         }
 
         // Now add this primitive index to all the currently active text shadow
         // primitives. Although we're adding the indices *after* the visual
         // primitive here, they will still draw before the visual text, since
         // the shadow primitive itself has been added to the draw cmd
         // list *before* the visual element, during push_shadow. We need
         // the primitive index of the visual element here before we can add
         // the indices as sub-primitives to the shadow primitives.
-        for shadow_prim_index in &self.shadow_prim_stack {
+        for &(shadow_prim_index, _) in &self.shadow_prim_stack {
             let shadow_metadata = &mut self.prim_store.cpu_metadata[shadow_prim_index.0];
             debug_assert_eq!(shadow_metadata.prim_kind, PrimitiveKind::Picture);
             let picture_prim =
                 &mut self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
 
             // Only run real blurs here (fast path zero blurs are handled above).
             let blur_radius = picture_prim.as_shadow().blur_radius;
             if blur_radius > 0.0 {
@@ -1726,30 +1765,41 @@ impl FrameBuilder {
                 &mut self.clip_store,
             ) {
                 stacking_context.screen_bounds = stacking_context
                     .screen_bounds
                     .union(&prim_geom.device_rect);
                 stacking_context.isolated_items_bounds = stacking_context
                     .isolated_items_bounds
                     .union(&prim_geom.local_rect);
+                stacking_context.has_any_primitive = true;
 
                 profile_counters.visible_primitives.inc();
             }
         }
 
         true //visible
     }
 
-    fn handle_pop_stacking_context(&mut self, screen_rect: &DeviceIntRect) {
+    fn handle_pop_stacking_context(
+        &mut self,
+        screen_rect: &DeviceIntRect,
+        clip_scroll_tree: &ClipScrollTree) {
         let stacking_context_index = self.stacking_context_stack.pop().unwrap();
 
         let (bounding_rect, is_visible, is_preserve_3d, reference_id, reference_bounds) = {
             let stacking_context =
                 &mut self.stacking_context_store[stacking_context_index.0];
+            if !stacking_context.has_any_primitive {
+                stacking_context.isolated_items_bounds = stacking_context.children_sc_bounds;
+            } else if stacking_context.isolation != ContextIsolation::Items {
+                stacking_context.isolated_items_bounds = stacking_context
+                    .isolated_items_bounds
+                    .union(&stacking_context.children_sc_bounds);
+            }
             stacking_context.screen_bounds = stacking_context
                 .screen_bounds
                 .intersection(screen_rect)
                 .unwrap_or(DeviceIntRect::zero());
             (
                 stacking_context.screen_bounds.clone(),
                 stacking_context.is_visible,
                 stacking_context.isolation == ContextIsolation::Items,
@@ -1758,19 +1808,31 @@ impl FrameBuilder {
                     .isolated_items_bounds
                     .translate(&stacking_context.reference_frame_offset),
             )
         };
 
         if let Some(ref mut parent_index) = self.stacking_context_stack.last_mut() {
             let parent = &mut self.stacking_context_store[parent_index.0];
             parent.screen_bounds = parent.screen_bounds.union(&bounding_rect);
+            let child_bounds = reference_bounds.translate(&-parent.reference_frame_offset);
+            let frame_node = clip_scroll_tree
+                .nodes
+                .get(&reference_id)
+                .unwrap();
+            let local_transform = match frame_node.node_type {
+                NodeType::ReferenceFrame(ref info) => info.transform,
+                _ => LayerToScrollTransform::identity(),
+            };
+            let transformed_bounds = local_transform
+                .with_destination::<LayerPixel>()
+                .transform_rect(&child_bounds);
+            parent.children_sc_bounds = parent.children_sc_bounds.union(&transformed_bounds);
             // add children local bounds only for non-item-isolated contexts
             if !is_preserve_3d && parent.reference_frame_id == reference_id {
-                let child_bounds = reference_bounds.translate(&-parent.reference_frame_offset);
                 parent.isolated_items_bounds = parent.isolated_items_bounds.union(&child_bounds);
             }
             // Per-primitive stacking context visibility checks do not take into account
             // visibility of child stacking contexts, so do that now.
             parent.is_visible = parent.is_visible || is_visible;
         }
     }
 
@@ -1912,17 +1974,17 @@ impl FrameBuilder {
                         pipelines,
                         clip_scroll_tree,
                         screen_rect,
                         device_pixel_ratio,
                         profile_counters,
                     );
                 }
                 PrimitiveRunCmd::PopStackingContext => {
-                    self.handle_pop_stacking_context(screen_rect);
+                    self.handle_pop_stacking_context(screen_rect, clip_scroll_tree);
                 }
             }
         }
 
         mem::replace(&mut self.cmds, commands);
     }
 
     fn update_scroll_bars(&mut self, clip_scroll_tree: &ClipScrollTree, gpu_cache: &mut GpuCache) {
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -710,12 +710,12 @@ impl ToDebugString for SpecificDisplayIt
             SpecificDisplayItem::PushStackingContext(..) => String::from("push_stacking_context"),
             SpecificDisplayItem::Iframe(..) => String::from("iframe"),
             SpecificDisplayItem::Clip(..) => String::from("clip"),
             SpecificDisplayItem::ScrollFrame(..) => String::from("scroll_frame"),
             SpecificDisplayItem::StickyFrame(..) => String::from("sticky_frame"),
             SpecificDisplayItem::SetGradientStops => String::from("set_gradient_stops"),
             SpecificDisplayItem::PopStackingContext => String::from("pop_stacking_context"),
             SpecificDisplayItem::PushShadow(..) => String::from("push_shadow"),
-            SpecificDisplayItem::PopShadow => String::from("pop_shadow"),
+            SpecificDisplayItem::PopAllShadows => String::from("pop_all_shadows"),
         }
     }
 }
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -5,17 +5,17 @@
 //! The webrender API.
 //!
 //! The `webrender::renderer` module provides the interface to webrender, which
 //! is accessible through [`Renderer`][renderer]
 //!
 //! [renderer]: struct.Renderer.html
 
 use api::{channel, BlobImageRenderer, FontRenderMode};
-use api::{ColorF, ColorU, Epoch, PipelineId, RenderApiSender, RenderNotifier};
+use api::{ColorF, Epoch, PipelineId, RenderApiSender, RenderNotifier};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceUintRect, DeviceUintSize};
 use api::{ExternalImageId, ExternalImageType, ImageFormat};
 use api::{YUV_COLOR_SPACES, YUV_FORMATS};
 use api::{YuvColorSpace, YuvFormat};
 #[cfg(not(feature = "debugger"))]
 use api::ApiMsg;
 use api::DebugCommand;
 #[cfg(not(feature = "debugger"))]
@@ -209,16 +209,33 @@ bitflags! {
     pub struct DebugFlags: u32 {
         const PROFILER_DBG      = 1 << 0;
         const RENDER_TARGET_DBG = 1 << 1;
         const TEXTURE_CACHE_DBG = 1 << 2;
         const ALPHA_PRIM_DBG    = 1 << 3;
     }
 }
 
+// A generic mode that can be passed to shaders to change
+// behaviour per draw-call.
+type ShaderMode = i32;
+
+#[repr(C)]
+enum TextShaderMode {
+    Alpha = 0,
+    SubpixelPass0 = 1,
+    SubpixelPass1 = 2,
+}
+
+impl Into<ShaderMode> for TextShaderMode {
+    fn into(self) -> i32 {
+        self as i32
+    }
+}
+
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 enum TextureSampler {
     Color0,
     Color1,
     Color2,
     CacheA8,
     CacheRGBA8,
     ResourceCache,
@@ -620,19 +637,17 @@ impl SourceTextureResolver {
     }
 }
 
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub enum BlendMode {
     None,
     Alpha,
     PremultipliedAlpha,
-
-    // Use the color of the text itself as a constant color blend factor.
-    Subpixel(ColorU),
+    Subpixel,
 }
 
 // Tracks the state of each row in the GPU cache texture.
 struct CacheRow {
     is_dirty: bool,
 }
 
 impl CacheRow {
@@ -838,17 +853,16 @@ impl VertexDataTexture {
 
     fn deinit(self, device: &mut Device) {
         device.delete_pbo(self.pbo);
         device.delete_texture(self.texture);
     }
 }
 
 const TRANSFORM_FEATURE: &str = "TRANSFORM";
-const SUBPIXEL_AA_FEATURE: &str = "SUBPIXEL_AA";
 const CLIP_FEATURE: &str = "CLIP";
 
 enum ShaderKind {
     Primitive,
     Cache(VertexArrayKind),
     ClipCache,
 }
 
@@ -876,31 +890,32 @@ impl LazilyCompiledShader {
 
         if precache {
             try!{ shader.get(device) };
         }
 
         Ok(shader)
     }
 
-    fn bind(
+    fn bind<M>(
         &mut self,
         device: &mut Device,
         projection: &Transform3D<f32>,
+        mode: M,
         renderer_errors: &mut Vec<RendererError>,
-    ) {
+    ) where M: Into<ShaderMode> {
         let program = match self.get(device) {
             Ok(program) => program,
             Err(e) => {
                 renderer_errors.push(RendererError::from(e));
                 return;
             }
         };
         device.bind_program(program);
-        device.set_uniforms(program, projection);
+        device.set_uniforms(program, projection, mode.into());
     }
 
     fn get(&mut self, device: &mut Device) -> Result<&Program, ShaderError> {
         if self.program.is_none() {
             let program = try!{
                 match self.kind {
                     ShaderKind::Primitive => {
                         create_prim_shader(self.name,
@@ -979,29 +994,30 @@ impl PrimitiveShader {
                                       &transform_features,
                                       device,
                                       precache)
         };
 
         Ok(PrimitiveShader { simple, transform })
     }
 
-    fn bind(
+    fn bind<M>(
         &mut self,
         device: &mut Device,
         transform_kind: TransformedRectKind,
         projection: &Transform3D<f32>,
+        mode: M,
         renderer_errors: &mut Vec<RendererError>,
-    ) {
+    ) where M: Into<ShaderMode> {
         match transform_kind {
             TransformedRectKind::AxisAligned => {
-                self.simple.bind(device, projection, renderer_errors)
+                self.simple.bind(device, projection, mode, renderer_errors)
             }
             TransformedRectKind::Complex => {
-                self.transform.bind(device, projection, renderer_errors)
+                self.transform.bind(device, projection, mode, renderer_errors)
             }
         }
     }
 
     fn deinit(self, device: &mut Device) {
         self.simple.deinit(device);
         self.transform.deinit(device);
     }
@@ -1123,17 +1139,16 @@ pub struct Renderer {
     // Most draw directly to the framebuffer, but some use inputs
     // from the cache shaders to draw. Specifically, the box
     // shadow primitive shader stretches the box shadow cache
     // output, and the cache_image shader blits the results of
     // a cache shader (e.g. blur) to the screen.
     ps_rectangle: PrimitiveShader,
     ps_rectangle_clip: PrimitiveShader,
     ps_text_run: PrimitiveShader,
-    ps_text_run_subpixel: PrimitiveShader,
     ps_image: Vec<Option<PrimitiveShader>>,
     ps_yuv_image: Vec<Option<PrimitiveShader>>,
     ps_border_corner: PrimitiveShader,
     ps_border_edge: PrimitiveShader,
     ps_gradient: PrimitiveShader,
     ps_angle_gradient: PrimitiveShader,
     ps_radial_gradient: PrimitiveShader,
     ps_box_shadow: PrimitiveShader,
@@ -1368,23 +1383,16 @@ impl Renderer {
 
         let ps_text_run = try!{
             PrimitiveShader::new("ps_text_run",
                                  &mut device,
                                  &[],
                                  options.precache_shaders)
         };
 
-        let ps_text_run_subpixel = try!{
-            PrimitiveShader::new("ps_text_run",
-                                 &mut device,
-                                 &[ SUBPIXEL_AA_FEATURE ],
-                                 options.precache_shaders)
-        };
-
         // All image configuration.
         let mut image_features = Vec::new();
         let mut ps_image: Vec<Option<PrimitiveShader>> = Vec::new();
         // PrimitiveShader is not clonable. Use push() to initialize the vec.
         for _ in 0 .. IMAGE_BUFFER_KINDS.len() {
             ps_image.push(None);
         }
         for buffer_kind in 0 .. IMAGE_BUFFER_KINDS.len() {
@@ -1730,17 +1738,16 @@ impl Renderer {
             cs_line,
             cs_blur,
             cs_clip_rectangle,
             cs_clip_border,
             cs_clip_image,
             ps_rectangle,
             ps_rectangle_clip,
             ps_text_run,
-            ps_text_run_subpixel,
             ps_image,
             ps_yuv_image,
             ps_border_corner,
             ps_border_edge,
             ps_box_shadow,
             ps_gradient,
             ps_angle_gradient,
             ps_radial_gradient,
@@ -2337,180 +2344,175 @@ impl Renderer {
         projection: &Transform3D<f32>,
         render_tasks: &RenderTaskTree,
         render_target: Option<(&Texture, i32)>,
         target_dimensions: DeviceUintSize,
     ) {
         let marker = match key.kind {
             BatchKind::Composite { .. } => {
                 self.ps_composite
-                    .bind(&mut self.device, projection, &mut self.renderer_errors);
+                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
                 GPU_TAG_PRIM_COMPOSITE
             }
             BatchKind::HardwareComposite => {
                 self.ps_hw_composite
-                    .bind(&mut self.device, projection, &mut self.renderer_errors);
+                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
                 GPU_TAG_PRIM_HW_COMPOSITE
             }
             BatchKind::SplitComposite => {
                 self.ps_split_composite.bind(
                     &mut self.device,
                     projection,
+                    0,
                     &mut self.renderer_errors,
                 );
                 GPU_TAG_PRIM_SPLIT_COMPOSITE
             }
             BatchKind::Blend => {
                 self.ps_blend
-                    .bind(&mut self.device, projection, &mut self.renderer_errors);
+                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
                 GPU_TAG_PRIM_BLEND
             }
             BatchKind::Transformable(transform_kind, batch_kind) => match batch_kind {
                 TransformBatchKind::Rectangle(needs_clipping) => {
                     debug_assert!(
                         !needs_clipping || match key.blend_mode {
                             BlendMode::Alpha |
                             BlendMode::PremultipliedAlpha |
-                            BlendMode::Subpixel(..) => true,
+                            BlendMode::Subpixel => true,
                             BlendMode::None => false,
                         }
                     );
 
                     if needs_clipping {
                         self.ps_rectangle_clip.bind(
                             &mut self.device,
                             transform_kind,
                             projection,
+                            0,
                             &mut self.renderer_errors,
                         );
                     } else {
                         self.ps_rectangle.bind(
                             &mut self.device,
                             transform_kind,
                             projection,
+                            0,
                             &mut self.renderer_errors,
                         );
                     }
                     GPU_TAG_PRIM_RECT
                 }
                 TransformBatchKind::Line => {
                     self.ps_line.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
+                        0,
                         &mut self.renderer_errors,
                     );
                     GPU_TAG_PRIM_LINE
                 }
                 TransformBatchKind::TextRun => {
-                    match key.blend_mode {
-                        BlendMode::Subpixel(..) => {
-                            self.ps_text_run_subpixel.bind(
-                                &mut self.device,
-                                transform_kind,
-                                projection,
-                                &mut self.renderer_errors,
-                            );
-                        }
-                        BlendMode::Alpha | BlendMode::PremultipliedAlpha | BlendMode::None => {
-                            self.ps_text_run.bind(
-                                &mut self.device,
-                                transform_kind,
-                                projection,
-                                &mut self.renderer_errors,
-                            );
-                        }
-                    };
-                    GPU_TAG_PRIM_TEXT_RUN
+                    unreachable!("bug: text batches are special cased");
                 }
                 TransformBatchKind::Image(image_buffer_kind) => {
                     self.ps_image[image_buffer_kind as usize]
                         .as_mut()
                         .expect("Unsupported image shader kind")
                         .bind(
                             &mut self.device,
                             transform_kind,
                             projection,
+                            0,
                             &mut self.renderer_errors,
                         );
                     GPU_TAG_PRIM_IMAGE
                 }
                 TransformBatchKind::YuvImage(image_buffer_kind, format, color_space) => {
                     let shader_index =
                         Renderer::get_yuv_shader_index(image_buffer_kind, format, color_space);
                     self.ps_yuv_image[shader_index]
                         .as_mut()
                         .expect("Unsupported YUV shader kind")
                         .bind(
                             &mut self.device,
                             transform_kind,
                             projection,
+                            0,
                             &mut self.renderer_errors,
                         );
                     GPU_TAG_PRIM_YUV_IMAGE
                 }
                 TransformBatchKind::BorderCorner => {
                     self.ps_border_corner.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
+                        0,
                         &mut self.renderer_errors,
                     );
                     GPU_TAG_PRIM_BORDER_CORNER
                 }
                 TransformBatchKind::BorderEdge => {
                     self.ps_border_edge.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
+                        0,
                         &mut self.renderer_errors,
                     );
                     GPU_TAG_PRIM_BORDER_EDGE
                 }
                 TransformBatchKind::AlignedGradient => {
                     self.ps_gradient.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
+                        0,
                         &mut self.renderer_errors,
                     );
                     GPU_TAG_PRIM_GRADIENT
                 }
                 TransformBatchKind::AngleGradient => {
                     self.ps_angle_gradient.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
+                        0,
                         &mut self.renderer_errors,
                     );
                     GPU_TAG_PRIM_ANGLE_GRADIENT
                 }
                 TransformBatchKind::RadialGradient => {
                     self.ps_radial_gradient.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
+                        0,
                         &mut self.renderer_errors,
                     );
                     GPU_TAG_PRIM_RADIAL_GRADIENT
                 }
                 TransformBatchKind::BoxShadow => {
                     self.ps_box_shadow.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
+                        0,
                         &mut self.renderer_errors,
                     );
                     GPU_TAG_PRIM_BOX_SHADOW
                 }
                 TransformBatchKind::CacheImage => {
                     self.ps_cache_image.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
+                        0,
                         &mut self.renderer_errors,
                     );
                     GPU_TAG_PRIM_CACHE_IMAGE
                 }
             },
         };
 
         // Handle special case readback for composites.
@@ -2633,17 +2635,17 @@ impl Renderer {
         // TODO(gw): In the future, consider having
         //           fast path blur shaders for common
         //           blur radii with fixed weights.
         if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_BLUR);
 
             self.device.set_blend(false);
             self.cs_blur
-                .bind(&mut self.device, projection, &mut self.renderer_errors);
+                .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
 
             if !target.vertical_blurs.is_empty() {
                 self.draw_instanced_batch(
                     &target.vertical_blurs,
                     VertexArrayKind::Blur,
                     &BatchTextures::no_texture(),
                 );
             }
@@ -2664,34 +2666,34 @@ impl Renderer {
         // it removes the overhead of submitting many small glyphs
         // to multiple tiles in the normal text run case.
         if !target.text_run_cache_prims.is_empty() {
             self.device.set_blend(true);
             self.device.set_blend_mode_alpha();
 
             let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_TEXT_RUN);
             self.cs_text_run
-                .bind(&mut self.device, projection, &mut self.renderer_errors);
+                .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             for (texture_id, instances) in &target.text_run_cache_prims {
                 self.draw_instanced_batch(
                     instances,
                     VertexArrayKind::Primitive,
                     &BatchTextures::color(*texture_id),
                 );
             }
         }
         if !target.line_cache_prims.is_empty() {
             // TODO(gw): Technically, we don't need blend for solid
             //           lines. We could check that here?
             self.device.set_blend(true);
             self.device.set_blend_mode_alpha();
 
             let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_LINE);
             self.cs_line
-                .bind(&mut self.device, projection, &mut self.renderer_errors);
+                .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             self.draw_instanced_batch(
                 &target.line_cache_prims,
                 VertexArrayKind::Primitive,
                 &BatchTextures::no_texture(),
             );
         }
 
         //TODO: record the pixel count for cached primitives
@@ -2727,57 +2729,136 @@ impl Renderer {
                     target_size,
                 );
             }
 
             self.device.disable_depth_write();
             self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
 
             for batch in &target.alpha_batcher.batch_list.alpha_batch_list.batches {
-                if batch.key.blend_mode != prev_blend_mode {
-                    match batch.key.blend_mode {
-                        BlendMode::None => {
-                            self.device.set_blend(false);
-                        }
-                        BlendMode::Alpha => {
-                            self.device.set_blend(true);
-                            self.device.set_blend_mode_alpha();
-                        }
-                        BlendMode::PremultipliedAlpha => {
-                            self.device.set_blend(true);
-                            self.device.set_blend_mode_premultiplied_alpha();
-                        }
-                        BlendMode::Subpixel(color) => {
-                            self.device.set_blend(true);
-                            self.device.set_blend_mode_subpixel(color.into());
-                        }
-                    }
-                    prev_blend_mode = batch.key.blend_mode;
-                }
-
                 if self.debug_flags.contains(ALPHA_PRIM_DBG) {
                     let color = match batch.key.blend_mode {
                         BlendMode::None => ColorF::new(0.3, 0.3, 0.3, 1.0),
                         BlendMode::Alpha => ColorF::new(0.0, 0.9, 0.1, 1.0),
                         BlendMode::PremultipliedAlpha => ColorF::new(0.0, 0.3, 0.7, 1.0),
-                        BlendMode::Subpixel(_) => ColorF::new(0.5, 0.0, 0.4, 1.0),
+                        BlendMode::Subpixel => ColorF::new(0.5, 0.0, 0.4, 1.0),
                     }.into();
                     for item_rect in &batch.item_rects {
                         self.debug.add_rect(item_rect, color);
                     }
                 }
 
-                self.submit_batch(
-                    &batch.key,
-                    &batch.instances,
-                    &projection,
-                    render_tasks,
-                    render_target,
-                    target_size,
-                );
+                match batch.key.kind {
+                    BatchKind::Transformable(transform_kind, TransformBatchKind::TextRun) => {
+                        // Text run batches are handled by this special case branch.
+                        // In the case of subpixel text, we draw it as a two pass
+                        // effect, to ensure we can apply clip masks correctly.
+                        // In the future, there are several optimizations available:
+                        // 1) Use dual source blending where available (almost all recent hardware).
+                        // 2) Use frame buffer fetch where available (most modern hardware).
+                        // 3) Consider the old constant color blend method where no clip is applied.
+                        let _gm = self.gpu_profile.add_marker(GPU_TAG_PRIM_TEXT_RUN);
+
+                        self.device.set_blend(true);
+
+                        match batch.key.blend_mode {
+                            BlendMode::PremultipliedAlpha => {
+                                self.device.set_blend_mode_premultiplied_alpha();
+
+                                self.ps_text_run.bind(
+                                    &mut self.device,
+                                    transform_kind,
+                                    projection,
+                                    TextShaderMode::Alpha,
+                                    &mut self.renderer_errors,
+                                );
+
+                                self.draw_instanced_batch(
+                                    &batch.instances,
+                                    VertexArrayKind::Primitive,
+                                    &batch.key.textures
+                                );
+                            }
+                            BlendMode::Subpixel => {
+                                // Using the two pass component alpha rendering technique:
+                                //
+                                // http://anholt.livejournal.com/32058.html
+                                //
+                                self.device.set_blend_mode_subpixel_pass0();
+
+                                self.ps_text_run.bind(
+                                    &mut self.device,
+                                    transform_kind,
+                                    projection,
+                                    TextShaderMode::SubpixelPass0,
+                                    &mut self.renderer_errors,
+                                );
+
+                                self.draw_instanced_batch(
+                                    &batch.instances,
+                                    VertexArrayKind::Primitive,
+                                    &batch.key.textures
+                                );
+
+                                self.device.set_blend_mode_subpixel_pass1();
+
+                                self.ps_text_run.bind(
+                                    &mut self.device,
+                                    transform_kind,
+                                    projection,
+                                    TextShaderMode::SubpixelPass1,
+                                    &mut self.renderer_errors,
+                                );
+
+                                // When drawing the 2nd pass, we know that the VAO, textures etc
+                                // are all set up from the previous draw_instanced_batch call,
+                                // so just issue a draw call here to avoid re-uploading the
+                                // instances and re-binding textures etc.
+                                self.device
+                                    .draw_indexed_triangles_instanced_u16(6, batch.instances.len() as i32);
+                            }
+                            BlendMode::Alpha | BlendMode::None => {
+                                unreachable!("bug: bad blend mode for text");
+                            }
+                        }
+
+                        prev_blend_mode = BlendMode::None;
+                        self.device.set_blend(false);
+                    }
+                    _ => {
+                        if batch.key.blend_mode != prev_blend_mode {
+                            match batch.key.blend_mode {
+                                BlendMode::None => {
+                                    self.device.set_blend(false);
+                                }
+                                BlendMode::Alpha => {
+                                    self.device.set_blend(true);
+                                    self.device.set_blend_mode_alpha();
+                                }
+                                BlendMode::PremultipliedAlpha => {
+                                    self.device.set_blend(true);
+                                    self.device.set_blend_mode_premultiplied_alpha();
+                                }
+                                BlendMode::Subpixel => {
+                                    unreachable!("bug: subpx text handled earlier");
+                                }
+                            }
+                            prev_blend_mode = batch.key.blend_mode;
+                        }
+
+                        self.submit_batch(
+                            &batch.key,
+                            &batch.instances,
+                            &projection,
+                            render_tasks,
+                            render_target,
+                            target_size,
+                        );
+                    }
+                }
             }
 
             self.device.disable_depth();
             self.device.set_blend(false);
             self.gpu_profile.done_sampler();
         }
 
         // For any registered image outputs on this render target,
@@ -2840,17 +2921,17 @@ impl Renderer {
                 .clear_target_rect(Some(clear_color), None, target.used_rect());
         }
 
         // Draw any box-shadow caches for this target.
         if !target.box_shadow_cache_prims.is_empty() {
             self.device.set_blend(false);
             let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_BOX_SHADOW);
             self.cs_box_shadow
-                .bind(&mut self.device, projection, &mut self.renderer_errors);
+                .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             self.draw_instanced_batch(
                 &target.box_shadow_cache_prims,
                 VertexArrayKind::CacheBoxShadow,
                 &BatchTextures::no_texture(),
             );
         }
 
         // Draw the clip items into the tiled alpha mask.
@@ -2859,17 +2940,17 @@ impl Renderer {
 
             // If we have border corner clips, the first step is to clear out the
             // area in the clip mask. This allows drawing multiple invididual clip
             // in regions below.
             if !target.clip_batcher.border_clears.is_empty() {
                 let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip borders [clear]");
                 self.device.set_blend(false);
                 self.cs_clip_border
-                    .bind(&mut self.device, projection, &mut self.renderer_errors);
+                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
                 self.draw_instanced_batch(
                     &target.clip_batcher.border_clears,
                     VertexArrayKind::Clip,
                     &BatchTextures::no_texture(),
                 );
             }
 
             // Draw any dots or dashes for border corners.
@@ -2877,17 +2958,17 @@ impl Renderer {
                 let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip borders");
                 // We are masking in parts of the corner (dots or dashes) here.
                 // Blend mode is set to max to allow drawing multiple dots.
                 // The individual dots and dashes in a border never overlap, so using
                 // a max blend mode here is fine.
                 self.device.set_blend(true);
                 self.device.set_blend_mode_max();
                 self.cs_clip_border
-                    .bind(&mut self.device, projection, &mut self.renderer_errors);
+                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
                 self.draw_instanced_batch(
                     &target.clip_batcher.borders,
                     VertexArrayKind::Clip,
                     &BatchTextures::no_texture(),
                 );
             }
 
             // switch to multiplicative blending
@@ -2895,16 +2976,17 @@ impl Renderer {
             self.device.set_blend_mode_multiply();
 
             // draw rounded cornered rectangles
             if !target.clip_batcher.rectangles.is_empty() {
                 let _gm2 = GpuMarker::new(self.device.rc_gl(), "clip rectangles");
                 self.cs_clip_rectangle.bind(
                     &mut self.device,
                     projection,
+                    0,
                     &mut self.renderer_errors,
                 );
                 self.draw_instanced_batch(
                     &target.clip_batcher.rectangles,
                     VertexArrayKind::Clip,
                     &BatchTextures::no_texture(),
                 );
             }
@@ -2914,17 +2996,17 @@ impl Renderer {
                 let textures = BatchTextures {
                     colors: [
                         mask_texture_id.clone(),
                         SourceTexture::Invalid,
                         SourceTexture::Invalid,
                     ],
                 };
                 self.cs_clip_image
-                    .bind(&mut self.device, projection, &mut self.renderer_errors);
+                    .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
                 self.draw_instanced_batch(items, VertexArrayKind::Clip, &textures);
             }
         }
 
         self.gpu_profile.done_sampler();
     }
 
     fn update_deferred_resolves(&mut self, frame: &mut Frame) {
@@ -3398,17 +3480,16 @@ impl Renderer {
         self.cs_line.deinit(&mut self.device);
         self.cs_blur.deinit(&mut self.device);
         self.cs_clip_rectangle.deinit(&mut self.device);
         self.cs_clip_image.deinit(&mut self.device);
         self.cs_clip_border.deinit(&mut self.device);
         self.ps_rectangle.deinit(&mut self.device);
         self.ps_rectangle_clip.deinit(&mut self.device);
         self.ps_text_run.deinit(&mut self.device);
-        self.ps_text_run_subpixel.deinit(&mut self.device);
         for shader in self.ps_image {
             if let Some(shader) = shader {
                 shader.deinit(&mut self.device);
             }
         }
         for shader in self.ps_yuv_image {
             if let Some(shader) = shader {
                 shader.deinit(&mut self.device);
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -50,17 +50,17 @@ impl AlphaBatchHelpers for PrimitiveStor
     ) -> BlendMode {
         let needs_blending = !metadata.opacity.is_opaque || metadata.clip_task_id.is_some() ||
             transform_kind == TransformedRectKind::Complex;
 
         match metadata.prim_kind {
             PrimitiveKind::TextRun => {
                 let text_run_cpu = &self.cpu_text_runs[metadata.cpu_prim_index.0];
                 match text_run_cpu.font.render_mode {
-                    FontRenderMode::Subpixel => BlendMode::Subpixel(text_run_cpu.font.color),
+                    FontRenderMode::Subpixel => BlendMode::Subpixel,
                     FontRenderMode::Alpha |
                     FontRenderMode::Mono |
                     FontRenderMode::Bitmap => BlendMode::PremultipliedAlpha,
                 }
             }
             PrimitiveKind::Image |
             PrimitiveKind::AlignedGradient |
             PrimitiveKind::AngleGradient |
@@ -124,36 +124,59 @@ impl AlphaBatchList {
 
     fn get_suitable_batch(
         &mut self,
         key: BatchKey,
         item_bounding_rect: &DeviceIntRect,
     ) -> &mut Vec<PrimitiveInstance> {
         let mut selected_batch_index = None;
 
-        // Composites always get added to their own batch.
-        // This is because the result of a composite can affect
-        // the input to the next composite. Perhaps we can
-        // optimize this in the future.
         match key.kind {
-            BatchKind::Composite { .. } => {}
-            _ => 'outer: for (batch_index, batch) in self.batches.iter().enumerate().rev().take(10)
-            {
-                if batch.key.is_compatible_with(&key) {
-                    selected_batch_index = Some(batch_index);
-                    break;
-                }
+            BatchKind::Composite { .. } => {
+                // Composites always get added to their own batch.
+                // This is because the result of a composite can affect
+                // the input to the next composite. Perhaps we can
+                // optimize this in the future.
+            }
+            BatchKind::Transformable(_, TransformBatchKind::TextRun) => {
+                'outer_text: for (batch_index, batch) in self.batches.iter().enumerate().rev().take(10) {
+                    // Subpixel text is drawn in two passes. Because of this, we need
+                    // to check for overlaps with every batch (which is a bit different
+                    // than the normal batching below).
+                    for item_rect in &batch.item_rects {
+                        if item_rect.intersects(item_bounding_rect) {
+                            break 'outer_text;
+                        }
+                    }
 
-                // check for intersections
-                for item_rect in &batch.item_rects {
-                    if item_rect.intersects(item_bounding_rect) {
-                        break 'outer;
+                    if batch.key.is_compatible_with(&key) {
+                        selected_batch_index = Some(batch_index);
+                        break;
                     }
                 }
-            },
+            }
+            _ => {
+                'outer_default: for (batch_index, batch) in self.batches.iter().enumerate().rev().take(10) {
+                    // For normal batches, we only need to check for overlaps for batches
+                    // other than the first batch we consider. If the first batch
+                    // is compatible, then we know there isn't any potential overlap
+                    // issues to worry about.
+                    if batch.key.is_compatible_with(&key) {
+                        selected_batch_index = Some(batch_index);
+                        break;
+                    }
+
+                    // check for intersections
+                    for item_rect in &batch.item_rects {
+                        if item_rect.intersects(item_bounding_rect) {
+                            break 'outer_default;
+                        }
+                    }
+                }
+            }
         }
 
         if selected_batch_index.is_none() {
             let new_batch = AlphaPrimitiveBatch::new(key);
             selected_batch_index = Some(self.batches.len());
             self.batches.push(new_batch);
         }
 
@@ -224,17 +247,17 @@ impl BatchList {
 
     fn get_suitable_batch(
         &mut self,
         key: BatchKey,
         item_bounding_rect: &DeviceIntRect,
     ) -> &mut Vec<PrimitiveInstance> {
         match key.blend_mode {
             BlendMode::None => self.opaque_batch_list.get_suitable_batch(key),
-            BlendMode::Alpha | BlendMode::PremultipliedAlpha | BlendMode::Subpixel(..) => {
+            BlendMode::Alpha | BlendMode::PremultipliedAlpha | BlendMode::Subpixel => {
                 self.alpha_batch_list
                     .get_suitable_batch(key, item_bounding_rect)
             }
         }
     }
 
     fn finalize(&mut self) {
         self.opaque_batch_list.finalize()
@@ -1596,16 +1619,27 @@ pub struct StackingContext {
     pub is_pipeline_root: bool,
 
     /// Whether or not this stacking context has any visible components, calculated
     /// based on the size and position of all children and how they are clipped.
     pub is_visible: bool,
 
     /// Current stacking context visibility of backface.
     pub is_backface_visible: bool,
+
+    /// Allow subpixel AA for text runs on this stacking context.
+    /// This is a temporary hack while we don't support subpixel AA
+    /// on transparent stacking contexts.
+    pub allow_subpixel_aa: bool,
+
+    /// Indicate that if any pritimive contained in this stacking context.
+    pub has_any_primitive: bool,
+
+    /// Union of all stacking context bounds of all children.
+    pub children_sc_bounds: LayerRect,
 }
 
 impl StackingContext {
     pub fn new(
         pipeline_id: PipelineId,
         reference_frame_offset: LayerVector2D,
         is_page_root: bool,
         is_pipeline_root: bool,
@@ -1613,28 +1647,33 @@ impl StackingContext {
         transform_style: TransformStyle,
         composite_ops: CompositeOps,
         is_backface_visible: bool,
     ) -> StackingContext {
         let isolation = match transform_style {
             TransformStyle::Flat => ContextIsolation::None,
             TransformStyle::Preserve3D => ContextIsolation::Items,
         };
+        let allow_subpixel_aa = composite_ops.count() == 0 &&
+                                isolation == ContextIsolation::None;
         StackingContext {
             pipeline_id,
             reference_frame_offset,
             reference_frame_id,
             screen_bounds: DeviceIntRect::zero(),
             isolated_items_bounds: LayerRect::zero(),
             composite_ops,
             isolation,
             is_page_root,
             is_pipeline_root,
             is_visible: false,
             is_backface_visible,
+            allow_subpixel_aa,
+            has_any_primitive: false,
+            children_sc_bounds: LayerRect::zero(),
         }
     }
 
     pub fn can_contribute_to_scene(&self) -> bool {
         !self.composite_ops.will_make_invisible()
     }
 }
 
--- a/gfx/webrender/tests/angle_shader_validation.rs
+++ b/gfx/webrender/tests/angle_shader_validation.rs
@@ -99,17 +99,17 @@ const SHADERS: &[Shader] = &[
         features: PRIM_FEATURES,
     },
     Shader {
         name: "ps_yuv_image",
         features: PRIM_FEATURES,
     },
     Shader {
         name: "ps_text_run",
-        features: &["", "TRANSFORM", "SUBPIXEL_AA_FEATURE"],
+        features: PRIM_FEATURES,
     },
     Shader {
         name: "ps_rectangle",
         features: &["", "TRANSFORM", "CLIP_FEATURE", "TRANSFORM,CLIP_FEATURE"],
     },
 ];
 
 const VERSION_STRING: &str = "#version 300 es\n";
--- a/gfx/webrender_api/Cargo.toml
+++ b/gfx/webrender_api/Cargo.toml
@@ -6,22 +6,22 @@ license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 
 [features]
 nightly = ["euclid/unstable", "serde/unstable"]
 ipc = ["ipc-channel"]
 
 [dependencies]
 app_units = "0.5.6"
-bincode = "0.8"
+bincode = "0.9"
 bitflags = "0.9"
 byteorder = "1.0"
 euclid = "0.15"
 heapsize = ">= 0.3.6, < 0.5"
-ipc-channel = {version = "0.8", optional = true}
+ipc-channel = {version = "0.9", optional = true}
 serde = { version = "1.0", features = ["rc", "derive"] }
 time = "0.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.4"
 core-graphics = "0.9"
 
 [target.'cfg(target_os = "windows")'.dependencies]
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -98,23 +98,22 @@ pub enum SpecificDisplayItem {
     BoxShadow(BoxShadowDisplayItem),
     Gradient(GradientDisplayItem),
     RadialGradient(RadialGradientDisplayItem),
     Iframe(IframeDisplayItem),
     PushStackingContext(PushStackingContextDisplayItem),
     PopStackingContext,
     SetGradientStops,
     PushShadow(Shadow),
-    PopShadow,
+    PopAllShadows,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct ClipDisplayItem {
     pub id: ClipId,
-    pub parent_id: ClipId,
     pub image_mask: Option<ImageMask>,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct StickyFrameDisplayItem {
     pub id: ClipId,
     pub sticky_frame_info: StickyFrameInfo,
 }
@@ -132,17 +131,16 @@ pub struct StickySideConstraint {
 pub enum ScrollSensitivity {
     ScriptAndInputEvents,
     Script,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct ScrollFrameDisplayItem {
     pub id: ClipId,
-    pub parent_id: ClipId,
     pub image_mask: Option<ImageMask>,
     pub scroll_sensitivity: ScrollSensitivity,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct RectangleDisplayItem {
     pub color: ColorF,
 }
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -12,19 +12,20 @@ use {LineDisplayItem, LineOrientation, L
 use {PropertyBinding, PushStackingContextDisplayItem, RadialGradient, RadialGradientDisplayItem};
 use {RectangleDisplayItem, ScrollFrameDisplayItem, ScrollPolicy, ScrollSensitivity};
 use {SpecificDisplayItem, StackingContext, StickyFrameDisplayItem, StickyFrameInfo};
 use {TextDisplayItem, Shadow, TransformStyle, YuvColorSpace, YuvData};
 use YuvImageDisplayItem;
 use bincode;
 use serde::{Deserialize, Serialize, Serializer};
 use serde::ser::{SerializeMap, SerializeSeq};
-use std::io::Write;
+use std::io::{Read, Write};
 use std::{io, ptr};
 use std::marker::PhantomData;
+use std::slice;
 use time::precise_time_ns;
 
 // We don't want to push a long text-run. If a text-run is too long, split it into several parts.
 // Please check the renderer::MAX_VERTEX_TEXTURE_WIDTH for the detail.
 pub const MAX_TEXT_RUN_LENGTH: usize = 2040;
 
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
@@ -166,16 +167,17 @@ fn skip_slice<T: for<'de> Deserialize<'d
         _boo: PhantomData,
     };
 
     // Adjust data pointer to skip read values
     *data = &data[range.length ..];
     (range, count)
 }
 
+
 impl<'a> BuiltDisplayListIter<'a> {
     pub fn new(list: &'a BuiltDisplayList) -> Self {
         Self::new_with_list_and_data(list, list.item_slice())
     }
 
     pub fn new_with_list_and_data(list: &'a BuiltDisplayList, data: &'a [u8]) -> Self {
         BuiltDisplayListIter {
             list,
@@ -216,17 +218,17 @@ impl<'a> BuiltDisplayListIter<'a> {
         self.cur_stops = ItemRange::default();
         self.cur_complex_clip = (ItemRange::default(), 0);
 
         loop {
             if self.data.len() == 0 {
                 return None;
             }
 
-            self.cur_item = bincode::deserialize_from(&mut self.data, bincode::Infinite)
+            self.cur_item = bincode::deserialize_from(&mut UnsafeReader::new(&mut self.data), bincode::Infinite)
                 .expect("MEH: malicious process?");
 
             match self.cur_item.item {
                 SetGradientStops => {
                     self.cur_stops = skip_slice::<GradientStop>(self.list, &mut self.data).0;
 
                     // This is a dummy item, skip over it
                     continue;
@@ -358,17 +360,17 @@ impl<'a, 'b> DisplayItemRef<'a, 'b> {
     }
 }
 
 impl<'de, 'a, T: Deserialize<'de>> AuxIter<'a, T> {
     pub fn new(mut data: &'a [u8]) -> Self {
         let size: usize = if data.len() == 0 {
             0 // Accept empty ItemRanges pointing anywhere
         } else {
-            bincode::deserialize_from(&mut data, bincode::Infinite).expect("MEH: malicious input?")
+            bincode::deserialize_from(&mut UnsafeReader::new(&mut data), bincode::Infinite).expect("MEH: malicious input?")
         };
 
         AuxIter {
             data,
             size,
             _boo: PhantomData,
         }
     }
@@ -378,17 +380,17 @@ impl<'a, T: for<'de> Deserialize<'de>> I
     type Item = T;
 
     fn next(&mut self) -> Option<T> {
         if self.size == 0 {
             None
         } else {
             self.size -= 1;
             Some(
-                bincode::deserialize_from(&mut self.data, bincode::Infinite)
+                bincode::deserialize_from(&mut UnsafeReader::new(&mut self.data), bincode::Infinite)
                     .expect("MEH: malicious input?"),
             )
         }
     }
 
     fn size_hint(&self) -> (usize, Option<usize>) {
         (self.size, Some(self.size))
     }
@@ -520,16 +522,83 @@ fn serialize_fast<T: Serialize>(vec: &mu
 
     // fix up the length
     unsafe { vec.set_len(old_len + size.0); }
 
     // make sure we wrote the right amount
     debug_assert!(((w.0 as usize) - (vec.as_ptr() as usize)) == vec.len());
 }
 
+// This uses a (start, end) representation instead of (start, len) so that
+// only need to update a single field as we read through it. This
+// makes it easier for llvm to understand what's going on. (https://github.com/rust-lang/rust/issues/45068)
+// We update the slice only once we're done reading
+struct UnsafeReader<'a: 'b, 'b> {
+    start: *const u8,
+    end: *const u8,
+    slice: &'b mut &'a [u8],
+}
+
+impl<'a, 'b> UnsafeReader<'a, 'b> {
+    #[inline(always)]
+    fn new(buf: &'b mut &'a [u8]) -> UnsafeReader<'a, 'b> {
+        unsafe {
+            let end = buf.as_ptr().offset(buf.len() as isize);
+            let start = buf.as_ptr();
+            UnsafeReader { start: start, end, slice: buf }
+        }
+    }
+
+    // This read implementation is significantly faster than the standard &[u8] one.
+    //
+    // First, it only supports reading exactly buf.len() bytes. This ensures that
+    // the argument to memcpy is always buf.len() and will allow a constant buf.len()
+    // to be propagated through to memcpy which LLVM will turn into explicit loads and
+    // stores. The standard implementation does a len = min(slice.len(), buf.len())
+    //
+    // Second, we only need to adjust 'start' after reading and it's only adjusted by a
+    // constant. This allows LLVM to avoid adjusting the length field after ever read
+    // and lets it be aggregated into a single adjustment.
+    #[inline(always)]
+    fn read_internal(&mut self, buf: &mut [u8]) {
+        // this is safe because we panic if start + buf.len() > end
+        unsafe {
+            assert!(self.start.offset(buf.len() as isize) <= self.end, "UnsafeReader: read past end of target");
+            ptr::copy_nonoverlapping(self.start, buf.as_mut_ptr(), buf.len());
+            self.start = self.start.offset(buf.len() as isize);
+        }
+    }
+}
+
+impl<'a, 'b> Drop for UnsafeReader<'a, 'b> {
+    // this adjusts input slice so that it properly represents the amount that's left.
+    #[inline(always)]
+    fn drop(&mut self) {
+        // this is safe because we know that start and end are contained inside the original slice
+        unsafe {
+            *self.slice = slice::from_raw_parts(self.start, (self.end as usize) - (self.start as usize));
+        }
+    }
+}
+
+impl<'a, 'b> Read for UnsafeReader<'a, 'b> {
+    // These methods were not being inlined and we need them to be so that the memcpy
+    // is for a constant size
+    #[inline(always)]
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        self.read_internal(buf);
+        Ok(buf.len())
+    }
+    #[inline(always)]
+    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
+        self.read_internal(buf);
+        Ok(())
+    }
+}
+
 #[derive(Clone, Debug)]
 pub struct SaveState {
     dl_len: usize,
     clip_stack_len: usize,
     next_clip_id: u64,
 }
 
 #[derive(Clone)]
@@ -625,16 +694,32 @@ impl DisplayListBuilder {
             &DisplayItem {
                 item,
                 clip_and_scroll: *self.clip_stack.last().unwrap(),
                 info: *info,
             },
         )
     }
 
+    fn push_item_with_clip_scroll_info(
+        &mut self,
+        item: SpecificDisplayItem,
+        info: &LayoutPrimitiveInfo,
+        scrollinfo: ClipAndScrollInfo
+    ) {
+        serialize_fast(
+            &mut self.data,
+            &DisplayItem {
+                item,
+                clip_and_scroll: scrollinfo,
+                info: *info,
+            },
+        )
+    }
+
     fn push_new_empty_item(&mut self, item: SpecificDisplayItem) {
         let info = LayoutPrimitiveInfo::new(LayoutRect::zero());
         serialize_fast(
             &mut self.data,
             &DisplayItem {
                 item,
                 clip_and_scroll: *self.clip_stack.last().unwrap(),
                 info,
@@ -1038,103 +1123,103 @@ impl DisplayListBuilder {
         complex_clips: I,
         image_mask: Option<ImageMask>,
         scroll_sensitivity: ScrollSensitivity,
     ) -> ClipId
     where
         I: IntoIterator<Item = ComplexClipRegion>,
         I::IntoIter: ExactSizeIterator,
     {
-        let parent_id = self.clip_stack.last().unwrap().scroll_node_id;
+        let parent = self.clip_stack.last().unwrap().scroll_node_id;
         self.define_scroll_frame_with_parent(
             id,
-            parent_id,
+            parent,
             content_rect,
             clip_rect,
             complex_clips,
             image_mask,
             scroll_sensitivity)
     }
 
     pub fn define_scroll_frame_with_parent<I>(
         &mut self,
         id: Option<ClipId>,
-        parent_id: ClipId,
+        parent: ClipId,
         content_rect: LayoutRect,
         clip_rect: LayoutRect,
         complex_clips: I,
         image_mask: Option<ImageMask>,
         scroll_sensitivity: ScrollSensitivity,
     ) -> ClipId
     where
         I: IntoIterator<Item = ComplexClipRegion>,
         I::IntoIter: ExactSizeIterator,
     {
         let id = self.generate_clip_id(id);
         let item = SpecificDisplayItem::ScrollFrame(ScrollFrameDisplayItem {
             id: id,
-            parent_id: parent_id,
             image_mask: image_mask,
             scroll_sensitivity,
         });
 
         let info = LayoutPrimitiveInfo {
             rect: content_rect,
             local_clip: LocalClip::from(clip_rect),
             is_backface_visible: true,
             tag: None,
         };
 
-        self.push_item(item, &info);
+        let scrollinfo = ClipAndScrollInfo::simple(parent);
+        self.push_item_with_clip_scroll_info(item, &info, scrollinfo);
         self.push_iter(complex_clips);
         id
     }
 
     pub fn define_clip<I>(
         &mut self,
         id: Option<ClipId>,
         clip_rect: LayoutRect,
         complex_clips: I,
         image_mask: Option<ImageMask>,
     ) -> ClipId
     where
         I: IntoIterator<Item = ComplexClipRegion>,
         I::IntoIter: ExactSizeIterator,
     {
-        let parent_id = self.clip_stack.last().unwrap().scroll_node_id;
+        let parent = self.clip_stack.last().unwrap().scroll_node_id;
         self.define_clip_with_parent(
             id,
-            parent_id,
+            parent,
             clip_rect,
             complex_clips,
             image_mask)
     }
 
     pub fn define_clip_with_parent<I>(
         &mut self,
         id: Option<ClipId>,
-        parent_id: ClipId,
+        parent: ClipId,
         clip_rect: LayoutRect,
         complex_clips: I,
         image_mask: Option<ImageMask>,
     ) -> ClipId
     where
         I: IntoIterator<Item = ComplexClipRegion>,
         I::IntoIter: ExactSizeIterator,
     {
         let id = self.generate_clip_id(id);
         let item = SpecificDisplayItem::Clip(ClipDisplayItem {
             id,
-            parent_id: parent_id,
             image_mask: image_mask,
         });
 
         let info = LayoutPrimitiveInfo::new(clip_rect);
 
-        self.push_item(item, &info);
+        let scrollinfo = ClipAndScrollInfo::simple(parent);
+        self.push_item_with_clip_scroll_info(item, &info, scrollinfo);
         self.push_iter(complex_clips);
         id
     }
 
     pub fn define_sticky_frame(
         &mut self,
         id: Option<ClipId>,
         frame_rect: LayoutRect,
@@ -1174,18 +1259,18 @@ impl DisplayListBuilder {
         });
         self.push_item(item, info);
     }
 
     pub fn push_shadow(&mut self, info: &LayoutPrimitiveInfo, shadow: Shadow) {
         self.push_item(SpecificDisplayItem::PushShadow(shadow), info);
     }
 
-    pub fn pop_shadow(&mut self) {
-        self.push_new_empty_item(SpecificDisplayItem::PopShadow);
+    pub fn pop_all_shadows(&mut self) {
+        self.push_new_empty_item(SpecificDisplayItem::PopAllShadows);
     }
 
     pub fn finalize(self) -> (PipelineId, LayoutSize, BuiltDisplayList) {
         assert!(self.save_state.is_none(), "Finalized DisplayListBuilder with a pending save");
 
         let end_time = precise_time_ns();