Bug 1405790 - Update webrender to commit 6440dff485271cdfd24a22c920cea31e01e2b164. r?jrmuizel draft
authorKartikaya Gupta <kgupta@mozilla.com>
Tue, 10 Oct 2017 08:01:29 -0400
changeset 677249 ec2ec418bb355ccfedf3e3201c08909666420e49
parent 677214 77a4c52e9987d2359969d7c478183b438b464744
child 677250 db1e236ee5a563af7354e34ba81b38211c022402
push id83735
push userkgupta@mozilla.com
push dateTue, 10 Oct 2017 12:16:32 +0000
reviewersjrmuizel
bugs1405790
milestone58.0a1
Bug 1405790 - Update webrender to commit 6440dff485271cdfd24a22c920cea31e01e2b164. r?jrmuizel MozReview-Commit-ID: JRPtTtZ1jNz
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/examples/nested_display_list.rs
gfx/webrender/res/cs_clip_rectangle.glsl
gfx/webrender/res/cs_text_run.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_border_corner.glsl
gfx/webrender/res/ps_hardware_composite.fs.glsl
gfx/webrender/res/ps_hardware_composite.glsl
gfx/webrender/res/ps_hardware_composite.vs.glsl
gfx/webrender/res/ps_radial_gradient.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/glyph_cache.rs
gfx/webrender/src/glyph_rasterizer.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/platform/macos/font.rs
gfx/webrender/src/platform/unix/font.rs
gfx/webrender/src/platform/windows/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender/src/util.rs
gfx/webrender_api/Cargo.toml
gfx/webrender_api/src/display_item.rs
gfx/webrender_api/src/display_list.rs
gfx/webrender_api/src/font.rs
gfx/webrender_api/src/lib.rs
gfx/webrender_bindings/Cargo.toml
gfx/webrender_bindings/webrender_ffi_generated.h
toolkit/library/gtest/rust/Cargo.lock
toolkit/library/rust/Cargo.lock
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -74,9 +74,9 @@ there is another crate in m-c called moz
 the same folder to store its rust dependencies. If one of the libraries that is
 required by both mozjs_sys and webrender is updated without updating the other
 project's Cargo.lock file, that results in build bustage.
 This means that any time you do this sort of manual update of packages, you need
 to make sure that mozjs_sys also has its Cargo.lock file updated if needed, hence
 the need to run the cargo update command in js/src as well. Hopefully this will
 be resolved soon.
 
-Latest Commit: a884e676449e5b41669cd6de51af14e70cbe3512
+Latest Commit: 6440dff485271cdfd24a22c920cea31e01e2b164
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -1,11 +1,11 @@
 [package]
 name = "webrender"
-version = "0.52.0"
+version = "0.52.1"
 authors = ["Glenn Watson <gw@intuitionlibrary.com>"]
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 build = "build.rs"
 
 [features]
 default = ["freetype-lib"]
 freetype-lib = ["freetype/servo-freetype-sys"]
deleted file mode 100644
--- a/gfx/webrender/examples/nested_display_list.rs
+++ /dev/null
@@ -1,158 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-extern crate gleam;
-extern crate glutin;
-extern crate webrender;
-
-#[path = "common/boilerplate.rs"]
-mod boilerplate;
-
-use boilerplate::{Example, HandyDandyRectBuilder};
-use webrender::api::*;
-
-struct App {
-    cursor_position: WorldPoint,
-}
-
-impl Example for App {
-    fn render(
-        &mut self,
-        _api: &RenderApi,
-        builder: &mut DisplayListBuilder,
-        _resources: &mut ResourceUpdates,
-        layout_size: LayoutSize,
-        pipeline_id: PipelineId,
-        _document_id: DocumentId,
-    ) {
-        let bounds = LayoutRect::new(LayoutPoint::zero(), layout_size);
-        let info = LayoutPrimitiveInfo::new(bounds);
-        builder.push_stacking_context(
-            &info,
-            ScrollPolicy::Scrollable,
-            None,
-            TransformStyle::Flat,
-            None,
-            MixBlendMode::Normal,
-            Vec::new(),
-        );
-
-        let outer_scroll_frame_rect = (100, 100).to(600, 400);
-        let info = LayoutPrimitiveInfo::new(outer_scroll_frame_rect);
-        builder.push_rect(&info, ColorF::new(1.0, 1.0, 1.0, 1.0));
-
-        let nested_clip_id = builder.define_scroll_frame(
-            None,
-            (100, 100).to(1000, 1000),
-            outer_scroll_frame_rect,
-            vec![],
-            None,
-            ScrollSensitivity::ScriptAndInputEvents,
-        );
-        builder.push_clip_id(nested_clip_id);
-
-        let mut builder2 = DisplayListBuilder::new(pipeline_id, layout_size);
-        let mut builder3 = DisplayListBuilder::new(pipeline_id, layout_size);
-
-        let info = LayoutPrimitiveInfo::new((110, 110).to(210, 210));
-        builder3.push_rect(&info, ColorF::new(0.0, 1.0, 0.0, 1.0));
-
-        // A fixed position rectangle should be fixed to the reference frame that starts
-        // in the outer display list.
-        let info = LayoutPrimitiveInfo::new((220, 110).to(320, 210));
-        builder3.push_stacking_context(
-            &info,
-            ScrollPolicy::Fixed,
-            None,
-            TransformStyle::Flat,
-            None,
-            MixBlendMode::Normal,
-            Vec::new(),
-        );
-
-        let info = LayoutPrimitiveInfo::new((0, 0).to(100, 100));
-        builder3.push_rect(&info, ColorF::new(0.0, 1.0, 0.0, 1.0));
-        builder3.pop_stacking_context();
-
-        // Now we push an inner scroll frame that should have the same id as the outer one,
-        // but the WebRender nested display list replacement code should convert it into
-        // a unique ClipId.
-        let inner_scroll_frame_rect = (330, 110).to(530, 360);
-        let info = LayoutPrimitiveInfo::new(inner_scroll_frame_rect);
-        builder3.push_rect(&info, ColorF::new(1.0, 0.0, 1.0, 0.5));
-        let inner_nested_clip_id = builder3.define_scroll_frame(
-            None,
-            (330, 110).to(2000, 2000),
-            inner_scroll_frame_rect,
-            vec![],
-            None,
-            ScrollSensitivity::ScriptAndInputEvents,
-        );
-        builder3.push_clip_id(inner_nested_clip_id);
-        let info = LayoutPrimitiveInfo::new((340, 120).to(440, 220));
-        builder3.push_rect(&info, ColorF::new(0.0, 1.0, 0.0, 1.0));
-        builder3.pop_clip_id();
-
-        let (_, _, built_list) = builder3.finalize();
-        builder2.push_nested_display_list(&built_list);
-        let (_, _, built_list) = builder2.finalize();
-        builder.push_nested_display_list(&built_list);
-
-        builder.pop_clip_id();
-
-        builder.pop_stacking_context();
-    }
-
-    fn on_event(&mut self, event: glutin::Event, api: &RenderApi, document_id: DocumentId) -> bool {
-        match event {
-            glutin::Event::KeyboardInput(glutin::ElementState::Pressed, _, Some(key)) => {
-                let offset = match key {
-                    glutin::VirtualKeyCode::Down => (0.0, -10.0),
-                    glutin::VirtualKeyCode::Up => (0.0, 10.0),
-                    glutin::VirtualKeyCode::Right => (-10.0, 0.0),
-                    glutin::VirtualKeyCode::Left => (10.0, 0.0),
-                    _ => return false,
-                };
-
-                api.scroll(
-                    document_id,
-                    ScrollLocation::Delta(LayoutVector2D::new(offset.0, offset.1)),
-                    self.cursor_position,
-                    ScrollEventPhase::Start,
-                );
-            }
-            glutin::Event::MouseMoved(x, y) => {
-                self.cursor_position = WorldPoint::new(x as f32, y as f32);
-            }
-            glutin::Event::MouseWheel(delta, _, event_cursor_position) => {
-                if let Some((x, y)) = event_cursor_position {
-                    self.cursor_position = WorldPoint::new(x as f32, y as f32);
-                }
-
-                const LINE_HEIGHT: f32 = 38.0;
-                let (dx, dy) = match delta {
-                    glutin::MouseScrollDelta::LineDelta(dx, dy) => (dx, dy * LINE_HEIGHT),
-                    glutin::MouseScrollDelta::PixelDelta(dx, dy) => (dx, dy),
-                };
-
-                api.scroll(
-                    document_id,
-                    ScrollLocation::Delta(LayoutVector2D::new(dx, dy)),
-                    self.cursor_position,
-                    ScrollEventPhase::Start,
-                );
-            }
-            _ => (),
-        }
-
-        false
-    }
-}
-
-fn main() {
-    let mut app = App {
-        cursor_position: WorldPoint::zero(),
-    };
-    boilerplate::main_wrapper(&mut app, None);
-}
--- a/gfx/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.glsl
@@ -22,38 +22,49 @@ ClipRect fetch_clip_rect(ivec2 address) 
     return ClipRect(RectWithSize(data[0].xy, data[0].zw), data[1]);
 }
 
 struct ClipCorner {
     RectWithSize rect;
     vec4 outer_inner_radius;
 };
 
-ClipCorner fetch_clip_corner(ivec2 address, int index) {
-    address += ivec2(2 + 2 * index, 0);
+ClipCorner fetch_clip_corner(ivec2 address) {
     vec4 data[2] = fetch_from_resource_cache_2_direct(address);
     return ClipCorner(RectWithSize(data[0].xy, data[0].zw), data[1]);
 }
 
 struct ClipData {
     ClipRect rect;
     ClipCorner top_left;
     ClipCorner top_right;
     ClipCorner bottom_left;
     ClipCorner bottom_right;
 };
 
 ClipData fetch_clip(ivec2 address) {
     ClipData clip;
 
     clip.rect = fetch_clip_rect(address);
-    clip.top_left = fetch_clip_corner(address, 0);
-    clip.top_right = fetch_clip_corner(address, 1);
-    clip.bottom_left = fetch_clip_corner(address, 2);
-    clip.bottom_right = fetch_clip_corner(address, 3);
+
+    // Read the corners in groups of two texels, and adjust the read address
+    // before every read.
+    // The address adjustment is done inside this function, and not by passing
+    // the corner index to fetch_clip_corner and computing the correct address
+    // there, because doing so was hitting a driver bug on certain Intel macOS
+    // drivers which creates wrong results when doing arithmetic with integer
+    // variables (under certain, unknown, circumstances).
+    address.x += 2;
+    clip.top_left = fetch_clip_corner(address);
+    address.x += 2;
+    clip.top_right = fetch_clip_corner(address);
+    address.x += 2;
+    clip.bottom_left = fetch_clip_corner(address);
+    address.x += 2;
+    clip.bottom_right = fetch_clip_corner(address);
 
     return clip;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
     Layer layer = fetch_layer(cmi.layer_address);
--- a/gfx/webrender/res/cs_text_run.glsl
+++ b/gfx/webrender/res/cs_text_run.glsl
@@ -33,17 +33,17 @@ void main(void) {
                               glyph_index,
                               text.subpx_dir);
 
     GlyphResource res = fetch_glyph_resource(resource_address);
 
     // Glyphs size is already in device-pixels.
     // The render task origin is in device-pixels. Offset that by
     // the glyph offset, relative to its primitive bounding rect.
-    vec2 size = res.uv_rect.zw - res.uv_rect.xy;
+    vec2 size = (res.uv_rect.zw - res.uv_rect.xy) * res.scale;
     vec2 local_pos = glyph.offset + vec2(res.offset.x, -res.offset.y) / uDevicePixelRatio;
     vec2 origin = prim.task.render_target_origin +
                   uDevicePixelRatio * (local_pos + shadow.offset - shadow_geom.local_rect.p0);
     vec4 local_rect = vec4(origin, size);
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -622,21 +622,22 @@ TransformVertexInfo write_transform_vert
 }
 
 #endif //WR_FEATURE_TRANSFORM
 
 struct GlyphResource {
     vec4 uv_rect;
     float layer;
     vec2 offset;
+    float scale;
 };
 
 GlyphResource fetch_glyph_resource(int address) {
     vec4 data[2] = fetch_from_resource_cache_2(address);
-    return GlyphResource(data[0], data[1].x, data[1].yz);
+    return GlyphResource(data[0], data[1].x, data[1].yz, data[1].w);
 }
 
 struct ImageResource {
     vec4 uv_rect;
     float layer;
 };
 
 ImageResource fetch_image_resource(int address) {
--- a/gfx/webrender/res/ps_border_corner.glsl
+++ b/gfx/webrender/res/ps_border_corner.glsl
@@ -105,20 +105,20 @@ void write_color(vec4 color0, vec4 color
         case SIDE_FIRST:
             color0.a = 0.0;
             break;
         case SIDE_SECOND:
             color1.a = 0.0;
             break;
     }
 
-    vColor00 = vec4(color0.rgb * modulate.x, color0.a);
-    vColor01 = vec4(color0.rgb * modulate.y, color0.a);
-    vColor10 = vec4(color1.rgb * modulate.z, color1.a);
-    vColor11 = vec4(color1.rgb * modulate.w, color1.a);
+    vColor00 = vec4(clamp(color0.rgb * modulate.x, vec3(0.0), vec3(1.0)), color0.a);
+    vColor01 = vec4(clamp(color0.rgb * modulate.y, vec3(0.0), vec3(1.0)), color0.a);
+    vColor10 = vec4(clamp(color1.rgb * modulate.z, vec3(0.0), vec3(1.0)), color1.a);
+    vColor11 = vec4(clamp(color1.rgb * modulate.w, vec3(0.0), vec3(1.0)), color1.a);
 }
 
 int select_style(int color_select, vec2 fstyle) {
     ivec2 style = ivec2(fstyle);
 
     switch (color_select) {
         case SIDE_BOTH:
         {
@@ -320,49 +320,63 @@ void main(void) {
     vec2 local_pos = init_transform_fs(vLocalPos, alpha);
 #else
     vec2 local_pos = vLocalPos;
 #endif
 
     alpha = min(alpha, do_clip());
 
     // Find the appropriate distance to apply the AA smoothstep over.
+    // Using 0.7 instead of 0.5 for the step compensates for the fact that smoothstep
+    // is smooth at its endpoints and has a steeper maximum slope than a linear ramp.
     vec2 fw = fwidth(local_pos);
-    float afwidth = length(fw);
+    float aa_step = 0.7 * length(fw);
+
     float distance_for_color;
     float color_mix_factor;
 
     // Only apply the clip AA if inside the clip region. This is
     // necessary for correctness when the border width is greater
     // than the border radius.
     if (all(lessThan(local_pos * vClipSign, vClipCenter * vClipSign))) {
         vec2 p = local_pos - vClipCenter;
 
+        // The coordinate system is snapped to pixel boundaries. To sample the distance,
+        // however, we are interested in the center of the pixels which introduces an
+        // error of half a pixel towards the exterior of the curve (See issue #1750).
+        // This error is corrected by offsetting the distance by half a device pixel.
+        // This not entirely correct: it leaves an error that varries between
+        // 0 and (sqrt(2) - 1)/2 = 0.2 pixels but it is hardly noticeable and is better
+        // than the constant sqrt(2)/2 px error without the correction.
+        // To correct this exactly we would need to offset p by half a pixel in the
+        // direction of the center of the ellipse (a different offset for each corner).
+
+        // A half device pixel in css pixels (using the average of width and height in case
+        // there is any kind of transform applied).
+        float half_px = 0.25 * (fw.x + fw.y);
         // Get signed distance from the inner/outer clips.
-        float d0 = distance_to_ellipse(p, vRadii0.xy);
-        float d1 = distance_to_ellipse(p, vRadii0.zw);
-        float d2 = distance_to_ellipse(p, vRadii1.xy);
-        float d3 = distance_to_ellipse(p, vRadii1.zw);
+        float d0 = distance_to_ellipse(p, vRadii0.xy) + half_px;
+        float d1 = distance_to_ellipse(p, vRadii0.zw) + half_px;
+        float d2 = distance_to_ellipse(p, vRadii1.xy) + half_px;
+        float d3 = distance_to_ellipse(p, vRadii1.zw) + half_px;
 
         // SDF subtract main radii
-        float d_main = max(d0, 0.5 * afwidth - d1);
+        float d_main = max(d0, aa_step - d1);
 
         // SDF subtract inner radii (double style borders)
-        float d_inner = max(d2 - 0.5 * afwidth, -d3);
+        float d_inner = max(d2 - aa_step, -d3);
 
         // Select how to combine the SDF based on border style.
         float d = mix(max(d_main, -d_inner), d_main, vSDFSelect);
 
         // Only apply AA to fragments outside the signed distance field.
-        alpha = min(alpha, 1.0 - smoothstep(0.0, 0.5 * afwidth, d));
+        alpha = min(alpha, 1.0 - smoothstep(0.0, aa_step, d));
 
         // Get the groove/ridge mix factor.
-        color_mix_factor = smoothstep(-0.5 * afwidth,
-                                      0.5 * afwidth,
-                                      -d2);
+        color_mix_factor = smoothstep(-aa_step, aa_step, -d2);
     } else {
         // Handle the case where the fragment is outside the clip
         // region in a corner. This occurs when border width is
         // greater than border radius.
 
         // Get linear distances along horizontal and vertical edges.
         vec2 d0 = vClipSign.xx * (local_pos.xx - vEdgeDistance.xz);
         vec2 d1 = vClipSign.yy * (local_pos.yy - vEdgeDistance.yw);
@@ -384,14 +398,14 @@ void main(void) {
 
     // Mix inner/outer color.
     vec4 color0 = mix(vColor00, vColor01, color_mix_factor);
     vec4 color1 = mix(vColor10, vColor11, color_mix_factor);
 
     // Select color based on side of line. Get distance from the
     // reference line, and then apply AA along the edge.
     float ld = distance_to_line(vColorEdgeLine.xy, vColorEdgeLine.zw, local_pos);
-    float m = smoothstep(-0.5 * afwidth, 0.5 * afwidth, ld);
+    float m = smoothstep(-aa_step, aa_step, ld);
     vec4 color = mix(color0, color1, m);
 
     oFragColor = color * vec4(1.0, 1.0, 1.0, alpha);
 }
 #endif
deleted file mode 100644
--- a/gfx/webrender/res/ps_hardware_composite.fs.glsl
+++ /dev/null
@@ -1,8 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-void main(void) {
-    vec2 uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
-    oFragColor = texture(sCacheRGBA8, vec3(uv, vUv.z));
-}
--- a/gfx/webrender/res/ps_hardware_composite.glsl
+++ b/gfx/webrender/res/ps_hardware_composite.glsl
@@ -1,8 +1,39 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include shared,prim_shared
 
 varying vec3 vUv;
 flat varying vec4 vUvBounds;
+
+#ifdef WR_VERTEX_SHADER
+void main(void) {
+    CompositeInstance ci = fetch_composite_instance();
+    AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
+    AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
+
+    vec2 dest_origin = dest_task.render_target_origin -
+                       dest_task.screen_space_origin +
+                       vec2(ci.user_data0, ci.user_data1);
+
+    vec2 local_pos = mix(dest_origin,
+                         dest_origin + src_task.size,
+                         aPosition.xy);
+
+    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
+    vec2 st0 = src_task.render_target_origin;
+    vec2 st1 = src_task.render_target_origin + src_task.size;
+    vUv = vec3(mix(st0, st1, aPosition.xy) / texture_size, src_task.render_target_layer_index);
+    vUvBounds = vec4(st0 + 0.5, st1 - 0.5) / texture_size.xyxy;
+
+    gl_Position = uTransform * vec4(local_pos, ci.z, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    vec2 uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
+    oFragColor = texture(sCacheRGBA8, vec3(uv, vUv.z));
+}
+#endif
deleted file mode 100644
--- a/gfx/webrender/res/ps_hardware_composite.vs.glsl
+++ /dev/null
@@ -1,25 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-void main(void) {
-    CompositeInstance ci = fetch_composite_instance();
-    AlphaBatchTask dest_task = fetch_alpha_batch_task(ci.render_task_index);
-    AlphaBatchTask src_task = fetch_alpha_batch_task(ci.src_task_index);
-
-    vec2 dest_origin = dest_task.render_target_origin -
-                       dest_task.screen_space_origin +
-                       vec2(ci.user_data0, ci.user_data1);
-
-    vec2 local_pos = mix(dest_origin,
-                         dest_origin + src_task.size,
-                         aPosition.xy);
-
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
-    vec2 st0 = src_task.render_target_origin;
-    vec2 st1 = src_task.render_target_origin + src_task.size;
-    vUv = vec3(mix(st0, st1, aPosition.xy) / texture_size, src_task.render_target_layer_index);
-    vUvBounds = vec4(st0 + 0.5, st1 - 0.5) / texture_size.xyxy;
-
-    gl_Position = uTransform * vec4(local_pos, ci.z, 1.0);
-}
--- a/gfx/webrender/res/ps_radial_gradient.glsl
+++ b/gfx/webrender/res/ps_radial_gradient.glsl
@@ -48,16 +48,18 @@ void main(void) {
     vEndCenter.y *= ratio_xy;
     vTileSize.y *= ratio_xy;
     vTileRepeat.y *= ratio_xy;
 
     vGradientAddress = prim.specific_prim_address + VECS_PER_GRADIENT;
 
     // Whether to repeat the gradient instead of clamping.
     vGradientRepeat = float(int(gradient.start_end_radius_ratio_xy_extend_mode.w) != EXTEND_MODE_CLAMP);
+
+    write_clip(vi.screen_pos, prim.clip_area);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     vec2 pos = mod(vPos, vTileRepeat);
 
     if (pos.x >= vTileSize.x ||
@@ -99,13 +101,26 @@ void main(void) {
             offset = t0;
         } else if (vStartRadius + rd * t1 >= 0.0) {
             offset = t1;
         } else {
             discard;
         }
     }
 
-    oFragColor = sample_gradient(vGradientAddress,
+    vec4 color = sample_gradient(vGradientAddress,
                                  offset,
                                  vGradientRepeat);
+
+    // Un-premultiply the color from sampling the gradient.
+    if (color.a > 0.0) {
+        color.rgb /= color.a;
+
+        // Apply the clip mask
+        color.a = min(color.a, do_clip());
+
+        // Pre-multiply the result.
+        color.rgb *= color.a;
+    }
+
+    oFragColor = color;
 }
 #endif
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -25,17 +25,17 @@ void main(void) {
                               text.subpx_dir);
     GlyphResource res = fetch_glyph_resource(resource_address);
 
     vec2 local_pos = glyph.offset +
                      text.offset +
                      vec2(res.offset.x, -res.offset.y) / uDevicePixelRatio;
 
     RectWithSize local_rect = RectWithSize(local_pos,
-                                           (res.uv_rect.zw - res.uv_rect.xy) / uDevicePixelRatio);
+                                           (res.uv_rect.zw - res.uv_rect.xy) * res.scale / uDevicePixelRatio);
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(local_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
                                                     prim.task,
                                                     local_rect);
@@ -52,32 +52,33 @@ void main(void) {
 #endif
 
     write_clip(vi.screen_pos, prim.clip_area);
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
 
-    vColor = text.color;
+    vColor = vec4(text.color.rgb * text.color.a, text.color.a);
     vUv = vec3(mix(st0, st1, f), res.layer);
     vUvBorder = (res.uv_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     vec3 tc = vec3(clamp(vUv.xy, vUvBorder.xy, vUvBorder.zw), vUv.z);
 #ifdef WR_FEATURE_SUBPIXEL_AA
     //note: the blend mode is not compatible with clipping
     oFragColor = texture(sColor0, tc);
 #else
     vec4 color = texture(sColor0, tc) * vColor;
+    float alpha = 1.0;
 #ifdef WR_FEATURE_TRANSFORM
     float a = 0.0;
     init_transform_fs(vLocalPos, a);
-    color.a *= a;
+    alpha *= a;
 #endif
-    color.a = min(color.a, do_clip());
-    oFragColor = color;
+    alpha = min(alpha, do_clip());
+    oFragColor = color * alpha;
 #endif
 }
 #endif
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -178,16 +178,21 @@ pub fn build_shader_strings(
     // Construct a list of strings to be passed to the shader compiler.
     let mut vs_source = String::new();
     let mut fs_source = String::new();
 
     // GLSL requires that the version number comes first.
     vs_source.push_str(gl_version_string);
     fs_source.push_str(gl_version_string);
 
+    // Insert the shader name to make debugging easier.
+    let name_string = format!("// {}\n", base_filename);
+    vs_source.push_str(&name_string);
+    fs_source.push_str(&name_string);
+
     // Define a constant depending on whether we are compiling VS or FS.
     vs_source.push_str(SHADER_KIND_VERTEX);
     fs_source.push_str(SHADER_KIND_FRAGMENT);
 
     // Add any defines that were passed by the caller.
     vs_source.push_str(features);
     fs_source.push_str(features);
 
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -26,147 +26,39 @@ pub struct FrameId(pub u32);
 
 static DEFAULT_SCROLLBAR_COLOR: ColorF = ColorF {
     r: 0.3,
     g: 0.3,
     b: 0.3,
     a: 0.6,
 };
 
-/// Nested display lists cause two types of replacements to ClipIds inside the nesting:
-///     1. References to the root scroll frame are replaced by the ClipIds that
-///        contained the nested display list.
-///     2. Other ClipIds (that aren't custom or reference frames) are assumed to be
-///        local to the nested display list and are converted to an id that is unique
-///        outside of the nested display list as well.
-///
-/// This structure keeps track of what ids are the "root" for one particular level of
-/// nesting as well as keeping and index, which can make ClipIds used internally unique
-/// in the full ClipScrollTree.
-#[derive(Debug)]
-struct NestedDisplayListInfo {
-    /// The index of this nested display list, which is used to generate
-    /// new ClipIds for clips that are defined inside it.
-    nest_index: u64,
-
-    /// The ClipId of the scroll frame node which contains this nested
-    /// display list. This is used to replace references to the root with
-    /// the proper ClipId.
-    scroll_node_id: ClipId,
-
-    /// The ClipId of the clip node which contains this nested display list.
-    /// This is used to replace references to the root with the proper ClipId.
-    clip_node_id: ClipId,
-}
-
-impl NestedDisplayListInfo {
-    fn convert_id_to_nested(&self, id: &ClipId) -> ClipId {
-        match *id {
-            ClipId::Clip(id, _, pipeline_id) => ClipId::Clip(id, self.nest_index, pipeline_id),
-            _ => *id,
-        }
-    }
-
-    fn convert_scroll_id_to_nested(&self, id: &ClipId) -> ClipId {
-        if id.pipeline_id() != self.scroll_node_id.pipeline_id() {
-            return *id;
-        }
-
-        if id.is_root_scroll_node() {
-            self.scroll_node_id
-        } else {
-            self.convert_id_to_nested(id)
-        }
-    }
-
-    fn convert_clip_id_to_nested(&self, id: &ClipId) -> ClipId {
-        if id.pipeline_id() != self.clip_node_id.pipeline_id() {
-            return *id;
-        }
-
-        if id.is_root_scroll_node() {
-            self.clip_node_id
-        } else {
-            self.convert_id_to_nested(id)
-        }
-    }
-
-    fn convert_new_id_to_nested(&self, id: &ClipId) -> ClipId {
-        if id.pipeline_id() != self.clip_node_id.pipeline_id() {
-            return *id;
-        }
-        self.convert_id_to_nested(id)
-    }
-}
-
 struct FlattenContext<'a> {
     scene: &'a Scene,
     builder: &'a mut FrameBuilder,
     resource_cache: &'a ResourceCache,
     tiled_image_map: TiledImageMap,
     replacements: Vec<(ClipId, ClipId)>,
-    nested_display_list_info: Vec<NestedDisplayListInfo>,
-    current_nested_display_list_index: u64,
 }
 
 impl<'a> FlattenContext<'a> {
     fn new(
         scene: &'a Scene,
         builder: &'a mut FrameBuilder,
         resource_cache: &'a ResourceCache,
     ) -> FlattenContext<'a> {
         FlattenContext {
             scene,
             builder,
             resource_cache,
             tiled_image_map: resource_cache.get_tiled_image_map(),
             replacements: Vec::new(),
-            nested_display_list_info: Vec::new(),
-            current_nested_display_list_index: 0,
         }
     }
 
-    fn push_nested_display_list_ids(&mut self, info: ClipAndScrollInfo) {
-        self.current_nested_display_list_index += 1;
-        self.nested_display_list_info.push(NestedDisplayListInfo {
-            nest_index: self.current_nested_display_list_index,
-            scroll_node_id: info.scroll_node_id,
-            clip_node_id: info.clip_node_id(),
-        });
-    }
-
-    fn pop_nested_display_list_ids(&mut self) {
-        self.nested_display_list_info.pop();
-    }
-
-    fn convert_new_id_to_nested(&self, id: &ClipId) -> ClipId {
-        if let Some(nested_info) = self.nested_display_list_info.last() {
-            nested_info.convert_new_id_to_nested(id)
-        } else {
-            *id
-        }
-    }
-
-    fn convert_clip_scroll_info_to_nested(&self, info: &mut ClipAndScrollInfo) {
-        if let Some(nested_info) = self.nested_display_list_info.last() {
-            info.scroll_node_id = nested_info.convert_scroll_id_to_nested(&info.scroll_node_id);
-            info.clip_node_id = info.clip_node_id
-                .map(|ref id| nested_info.convert_clip_id_to_nested(id));
-        }
-
-        // We only want to produce nested ClipIds if we are in a nested display
-        // list situation.
-        debug_assert!(
-            !info.scroll_node_id.is_nested() || !self.nested_display_list_info.is_empty()
-        );
-        debug_assert!(
-            !info.clip_node_id().is_nested() || !self.nested_display_list_info.is_empty()
-        );
-    }
-
     /// Since WebRender still handles fixed position and reference frame content internally
     /// we need to apply this table of id replacements only to the id that affects the
     /// position of a node. We can eventually remove this when clients start handling
     /// reference frames themselves. This method applies these replacements.
     fn apply_scroll_frame_id_replacement(&self, id: ClipId) -> ClipId {
         match self.replacements.last() {
             Some(&(to_replace, replacement)) if to_replace == id => replacement,
             _ => id,
@@ -332,19 +224,18 @@ impl Frame {
     fn flatten_clip<'a>(
         &mut self,
         context: &mut FlattenContext,
         pipeline_id: PipelineId,
         parent_id: &ClipId,
         new_clip_id: &ClipId,
         clip_region: ClipRegion,
     ) {
-        let new_clip_id = context.convert_new_id_to_nested(new_clip_id);
         context.builder.add_clip_node(
-            new_clip_id,
+            *new_clip_id,
             *parent_id,
             pipeline_id,
             clip_region,
             &mut self.clip_scroll_tree,
         );
     }
 
     fn flatten_scroll_frame<'a>(
@@ -362,19 +253,18 @@ impl Frame {
         context.builder.add_clip_node(
             clip_id,
             *parent_id,
             pipeline_id,
             clip_region,
             &mut self.clip_scroll_tree,
         );
 
-        let new_scroll_frame_id = context.convert_new_id_to_nested(new_scroll_frame_id);
         context.builder.add_scroll_frame(
-            new_scroll_frame_id,
+            *new_scroll_frame_id,
             clip_id,
             pipeline_id,
             &frame_rect,
             &content_rect.size,
             scroll_sensitivity,
             &mut self.clip_scroll_tree,
         );
     }
@@ -550,17 +440,16 @@ impl Frame {
     fn flatten_item<'a, 'b>(
         &mut self,
         item: DisplayItemRef<'a, 'b>,
         pipeline_id: PipelineId,
         context: &mut FlattenContext,
         reference_frame_relative_offset: LayerVector2D,
     ) -> Option<BuiltDisplayListIter<'a>> {
         let mut clip_and_scroll = item.clip_and_scroll();
-        context.convert_clip_scroll_info_to_nested(&mut clip_and_scroll);
 
         let unreplaced_scroll_id = clip_and_scroll.scroll_node_id;
         clip_and_scroll.scroll_node_id =
             context.apply_scroll_frame_id_replacement(clip_and_scroll.scroll_node_id);
 
         let prim_info = item.get_layer_primitive_info(&reference_frame_relative_offset);
         match *item.item() {
             SpecificDisplayItem::Image(ref info) => {
@@ -772,32 +661,23 @@ impl Frame {
                     &frame_rect,
                     &content_rect,
                     clip_region,
                     info.scroll_sensitivity,
                 );
             }
             SpecificDisplayItem::StickyFrame(ref info) => {
                 let frame_rect = item.rect().translate(&reference_frame_relative_offset);
-                let new_clip_id = context.convert_new_id_to_nested(&info.id);
                 self.clip_scroll_tree.add_sticky_frame(
-                    new_clip_id,
+                    info.id,
                     clip_and_scroll.scroll_node_id, /* parent id */
                     frame_rect,
                     info.sticky_frame_info,
                 );
             }
-            SpecificDisplayItem::PushNestedDisplayList => {
-                // Using the clip and scroll already processed for nesting here
-                // means that in the case of multiple nested display lists, we
-                // will enter the outermost ids into the table and avoid having
-                // to do a replacement for every level of nesting.
-                context.push_nested_display_list_ids(clip_and_scroll);
-            }
-            SpecificDisplayItem::PopNestedDisplayList => context.pop_nested_display_list_ids(),
 
             // Do nothing; these are dummy items for the display list parser
             SpecificDisplayItem::SetGradientStops => {}
 
             SpecificDisplayItem::PopStackingContext => {
                 unreachable!("Should have returned in parent method.")
             }
             SpecificDisplayItem::PushShadow(shadow) => {
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -5,33 +5,34 @@
 use api::{BorderDetails, BorderDisplayItem, BorderRadius, BoxShadowClipMode, BuiltDisplayList};
 use api::{ClipAndScrollInfo, ClipId, ColorF};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceUintRect, DeviceUintSize};
 use api::{ExtendMode, FIND_ALL, FilterOp, FontInstance, FontRenderMode};
 use api::{GlyphInstance, GlyphOptions, GradientStop, HitTestFlags, HitTestItem, HitTestResult};
 use api::{ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerPrimitiveInfo, LayerRect};
 use api::{LayerSize, LayerToScrollTransform, LayerVector2D, LayoutVector2D, LineOrientation};
 use api::{LineStyle, LocalClip, POINT_RELATIVE_TO_PIPELINE_VIEWPORT, PipelineId, RepeatMode};
-use api::{ScrollSensitivity, SubpixelDirection, Shadow, TileOffset, TransformStyle};
+use api::{ScrollSensitivity, Shadow, TileOffset, TransformStyle};
 use api::{WorldPixel, WorldPoint, YuvColorSpace, YuvData, device_length};
 use app_units::Au;
 use border::ImageBorderSegment;
 use clip::{ClipMode, ClipRegion, ClipSource, ClipSources, ClipStore, Contains};
 use clip_scroll_node::{ClipInfo, ClipScrollNode, NodeType};
 use clip_scroll_tree::ClipScrollTree;
 use euclid::{SideOffsets2D, vec2, vec3};
 use frame::FrameId;
 use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet, HardwareCompositeOp};
+use picture::PicturePrimitive;
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{BoxShadowPrimitiveCpu, TexelRect, YuvImagePrimitiveCpu};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, LinePrimitive, PrimitiveKind};
 use prim_store::{PrimitiveContainer, PrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
-use prim_store::{RectanglePrimitive, TextRunPrimitiveCpu, ShadowPrimitiveCpu};
+use prim_store::{RectanglePrimitive, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_task::{AlphaRenderItem, ClipWorkItem, RenderTask};
 use render_task::{RenderTaskId, RenderTaskLocation, RenderTaskTree};
 use resource_cache::ResourceCache;
 use scene::ScenePipeline;
 use std::{mem, usize, f32, i32};
 use tiling::{ClipScrollGroup, ClipScrollGroupIndex, CompositeOps, Frame};
 use tiling::{ContextIsolation, StackingContextIndex};
@@ -579,49 +580,46 @@ impl FrameBuilder {
     }
 
     pub fn push_shadow(
         &mut self,
         shadow: Shadow,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
     ) {
-        let prim = ShadowPrimitiveCpu {
-            shadow,
-            primitives: Vec::new(),
-            render_task_id: None,
-        };
+        let prim = PicturePrimitive::new_shadow(shadow);
 
         // Create an empty shadow primitive. Insert it into
         // the draw lists immediately so that it will be drawn
         // before any visual text elements that are added as
         // part of this shadow context.
         let prim_index = self.add_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
-            PrimitiveContainer::Shadow(prim),
+            PrimitiveContainer::Picture(prim),
         );
 
         self.shadow_prim_stack.push(prim_index);
     }
 
     pub fn pop_shadow(&mut self) {
         let prim_index = self.shadow_prim_stack
             .pop()
             .expect("invalid shadow push/pop count");
 
         // By now, the local rect of the text shadow has been calculated. It
         // is calculated as the items in the shadow are added. It's now
         // safe to offset the local rect by the offset of the shadow, which
         // is then used when blitting the shadow to the final location.
         let metadata = &mut self.prim_store.cpu_metadata[prim_index.0];
-        let prim = &self.prim_store.cpu_shadows[metadata.cpu_prim_index.0];
+        let prim = &self.prim_store.cpu_pictures[metadata.cpu_prim_index.0];
+        let shadow = prim.as_shadow();
 
-        metadata.local_rect = metadata.local_rect.translate(&prim.shadow.offset);
+        metadata.local_rect = metadata.local_rect.translate(&shadow.offset);
     }
 
     pub fn add_solid_rectangle(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
         color: &ColorF,
         flags: PrimitiveFlags,
@@ -681,19 +679,20 @@ impl FrameBuilder {
             color: *color,
             style: style,
             orientation: orientation,
         };
 
         let mut fast_shadow_prims = Vec::new();
         for shadow_prim_index in &self.shadow_prim_stack {
             let shadow_metadata = &self.prim_store.cpu_metadata[shadow_prim_index.0];
-            let shadow_prim = &self.prim_store.cpu_shadows[shadow_metadata.cpu_prim_index.0];
-            if shadow_prim.shadow.blur_radius == 0.0 {
-                fast_shadow_prims.push(shadow_prim.shadow);
+            let picture = &self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
+            let shadow = picture.as_shadow();
+            if shadow.blur_radius == 0.0 {
+                fast_shadow_prims.push(shadow.clone());
             }
         }
         for shadow in fast_shadow_prims {
             let mut line = line.clone();
             line.color = shadow.color;
             let mut info = info.clone();
             info.rect = new_rect.translate(&shadow.offset);
             self.add_primitive(
@@ -715,28 +714,29 @@ impl FrameBuilder {
 
         if color.a > 0.0 {
             self.add_primitive_to_hit_testing_list(&info, clip_and_scroll);
             self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
         }
 
         for shadow_prim_index in &self.shadow_prim_stack {
             let shadow_metadata = &mut self.prim_store.cpu_metadata[shadow_prim_index.0];
-            debug_assert_eq!(shadow_metadata.prim_kind, PrimitiveKind::Shadow);
-            let shadow_prim =
-                &mut self.prim_store.cpu_shadows[shadow_metadata.cpu_prim_index.0];
+            debug_assert_eq!(shadow_metadata.prim_kind, PrimitiveKind::Picture);
+            let picture =
+                &mut self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
+            let blur_radius = picture.as_shadow().blur_radius;
 
             // Only run real blurs here (fast path zero blurs are handled above).
-            if shadow_prim.shadow.blur_radius > 0.0 {
+            if blur_radius > 0.0 {
                 let shadow_rect = new_rect.inflate(
-                    shadow_prim.shadow.blur_radius,
-                    shadow_prim.shadow.blur_radius,
+                    blur_radius,
+                    blur_radius,
                 );
                 shadow_metadata.local_rect = shadow_metadata.local_rect.union(&shadow_rect);
-                shadow_prim.primitives.push(prim_index);
+                picture.add_primitive(prim_index, clip_and_scroll);
             }
         }
     }
 
     pub fn add_border(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
@@ -1127,109 +1127,85 @@ impl FrameBuilder {
         // by the azure renderer.
         if font.size >= Au::from_px(4096) {
             return;
         }
 
         // TODO(gw): Use a proper algorithm to select
         // whether this item should be rendered with
         // subpixel AA!
-        let mut default_render_mode = self.config
+        let mut render_mode = self.config
             .default_font_render_mode
             .limit_by(font.render_mode);
         if let Some(options) = glyph_options {
-            default_render_mode = default_render_mode.limit_by(options.render_mode);
+            render_mode = render_mode.limit_by(options.render_mode);
         }
 
         // There are some conditions under which we can't use
         // subpixel text rendering, even if enabled.
-        let mut normal_render_mode = default_render_mode;
-        if normal_render_mode == FontRenderMode::Subpixel {
+        if render_mode == FontRenderMode::Subpixel {
             if color.a != 1.0 {
-                normal_render_mode = FontRenderMode::Alpha;
+                render_mode = FontRenderMode::Alpha;
             }
 
             // text on a stacking context that has filters
             // (e.g. opacity) can't use sub-pixel.
             // TODO(gw): It's possible we can relax this in
             //           the future, if we modify the way
             //           we handle subpixel blending.
             if let Some(sc_index) = self.stacking_context_stack.last() {
                 let stacking_context = &self.stacking_context_store[sc_index.0];
                 if stacking_context.composite_ops.count() > 0 {
-                    normal_render_mode = FontRenderMode::Alpha;
+                    render_mode = FontRenderMode::Alpha;
                 }
             }
         }
 
-        let color = match font.render_mode {
-            FontRenderMode::Bitmap => ColorF::new(1.0, 1.0, 1.0, 1.0),
-            FontRenderMode::Subpixel |
-            FontRenderMode::Alpha |
-            FontRenderMode::Mono => *color,
-        };
-
-        // Shadows never use subpixel AA, but need to respect the alpha/mono flag
-        // for reftests.
-        let (shadow_render_mode, subpx_dir) = match default_render_mode {
-            FontRenderMode::Subpixel | FontRenderMode::Alpha => {
-                // TODO(gw): Expose subpixel direction in API once WR supports
-                //           vertical text runs.
-                (FontRenderMode::Alpha, font.subpx_dir)
-            }
-            FontRenderMode::Mono => (FontRenderMode::Mono, SubpixelDirection::None),
-            FontRenderMode::Bitmap => (FontRenderMode::Bitmap, font.subpx_dir),
-        };
-
         let prim_font = FontInstance::new(
             font.font_key,
             font.size,
-            color,
-            normal_render_mode,
-            subpx_dir,
+            *color,
+            render_mode,
+            font.subpx_dir,
             font.platform_options,
             font.variations.clone(),
             font.synthetic_italics,
         );
         let prim = TextRunPrimitiveCpu {
             font: prim_font,
             glyph_range,
             glyph_count,
             glyph_gpu_blocks: Vec::new(),
             glyph_keys: Vec::new(),
-            shadow_render_mode,
             offset: run_offset,
-            color: color,
         };
 
         // Text shadows that have a blur radius of 0 need to be rendered as normal
         // text elements to get pixel perfect results for reftests. It's also a big
         // performance win to avoid blurs and render target allocations where
         // possible. For any text shadows that have zero blur, create a normal text
         // primitive with the shadow's color and offset. These need to be added
         // *before* the visual text primitive in order to get the correct paint
         // order. Store them in a Vec first to work around borrowck issues.
         // TODO(gw): Refactor to avoid having to store them in a Vec first.
         let mut fast_shadow_prims = Vec::new();
         for shadow_prim_index in &self.shadow_prim_stack {
             let shadow_metadata = &self.prim_store.cpu_metadata[shadow_prim_index.0];
-            let shadow_prim = &self.prim_store.cpu_shadows[shadow_metadata.cpu_prim_index.0];
-            if shadow_prim.shadow.blur_radius == 0.0 {
+            let picture_prim = &self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
+            let shadow = picture_prim.as_shadow();
+            if shadow.blur_radius == 0.0 {
                 let mut text_prim = prim.clone();
-                if font.render_mode != FontRenderMode::Bitmap {
-                    text_prim.font.color = shadow_prim.shadow.color.into();
-                }
+                text_prim.font.color = shadow.color.into();
                 // If we have translucent text, we need to ensure it won't go
                 // through the subpixel blend mode, which doesn't work with
                 // traditional alpha blending.
-                if shadow_prim.shadow.color.a != 1.0 {
+                if shadow.color.a != 1.0 {
                     text_prim.font.render_mode = text_prim.font.render_mode.limit_by(FontRenderMode::Alpha);
                 }
-                text_prim.color = shadow_prim.shadow.color;
-                text_prim.offset += shadow_prim.shadow.offset;
+                text_prim.offset += shadow.offset;
                 fast_shadow_prims.push(text_prim);
             }
         }
         for text_prim in fast_shadow_prims {
             let rect = info.rect;
             let mut info = info.clone();
             info.rect = rect.translate(&text_prim.offset);
             self.add_primitive(
@@ -1259,28 +1235,29 @@ impl FrameBuilder {
         // primitives. Although we're adding the indices *after* the visual
         // primitive here, they will still draw before the visual text, since
         // the shadow primitive itself has been added to the draw cmd
         // list *before* the visual element, during push_shadow. We need
         // the primitive index of the visual element here before we can add
         // the indices as sub-primitives to the shadow primitives.
         for shadow_prim_index in &self.shadow_prim_stack {
             let shadow_metadata = &mut self.prim_store.cpu_metadata[shadow_prim_index.0];
-            debug_assert_eq!(shadow_metadata.prim_kind, PrimitiveKind::Shadow);
-            let shadow_prim =
-                &mut self.prim_store.cpu_shadows[shadow_metadata.cpu_prim_index.0];
+            debug_assert_eq!(shadow_metadata.prim_kind, PrimitiveKind::Picture);
+            let picture_prim =
+                &mut self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
 
             // Only run real blurs here (fast path zero blurs are handled above).
-            if shadow_prim.shadow.blur_radius > 0.0 {
+            let blur_radius = picture_prim.as_shadow().blur_radius;
+            if blur_radius > 0.0 {
                 let shadow_rect = rect.inflate(
-                    shadow_prim.shadow.blur_radius,
-                    shadow_prim.shadow.blur_radius,
+                    blur_radius,
+                    blur_radius,
                 );
                 shadow_metadata.local_rect = shadow_metadata.local_rect.union(&shadow_rect);
-                shadow_prim.primitives.push(prim_index);
+                picture_prim.add_primitive(prim_index, clip_and_scroll);
             }
         }
     }
 
     pub fn fill_box_shadow_rect(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
--- a/gfx/webrender/src/glyph_cache.rs
+++ b/gfx/webrender/src/glyph_cache.rs
@@ -8,16 +8,17 @@ use resource_cache::ResourceClassCache;
 use std::sync::Arc;
 use texture_cache::TextureCacheHandle;
 
 pub struct CachedGlyphInfo {
     pub texture_cache_handle: TextureCacheHandle,
     pub glyph_bytes: Arc<Vec<u8>>,
     pub size: DeviceUintSize,
     pub offset: DevicePoint,
+    pub scale: f32,
 }
 
 pub type GlyphKeyCache = ResourceClassCache<GlyphKey, Option<CachedGlyphInfo>>;
 
 pub struct GlyphCache {
     pub glyph_key_caches: FastHashMap<FontInstance, GlyphKeyCache>,
 }
 
--- a/gfx/webrender/src/glyph_rasterizer.rs
+++ b/gfx/webrender/src/glyph_rasterizer.rs
@@ -1,17 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #[cfg(test)]
-use api::{ColorF, FontRenderMode, IdNamespace, LayoutPoint, SubpixelDirection};
+use api::{ColorF, IdNamespace, LayoutPoint};
 use api::{DevicePoint, DeviceUintSize, FontInstance};
-use api::{FontKey, FontTemplate};
-use api::{GlyphDimensions, GlyphKey};
+use api::{FontKey, FontTemplate, FontRenderMode, ColorU};
+use api::{GlyphDimensions, GlyphKey, SubpixelDirection};
 use api::{ImageData, ImageDescriptor, ImageFormat};
 #[cfg(test)]
 use app_units::Au;
 use device::TextureFilter;
 use glyph_cache::{CachedGlyphInfo, GlyphCache};
 use gpu_cache::GpuCache;
 use internal_types::FastHashSet;
 use platform::font::{FontContext, RasterizedGlyph};
@@ -139,16 +139,39 @@ impl GlyphRasterizer {
                 .add_font(&font_key, &template);
         }
     }
 
     pub fn delete_font(&mut self, font_key: FontKey) {
         self.fonts_to_remove.push(font_key);
     }
 
+    pub fn prepare_font(&self, font: &mut FontInstance) {
+        // In alpha/mono mode, the color of the font is irrelevant.
+        // Forcing it to black in those cases saves rasterizing glyphs
+        // of different colors when not needed.
+        match font.render_mode {
+            FontRenderMode::Mono | FontRenderMode::Bitmap => {
+                font.color = ColorU::new(255, 255, 255, 255);
+                // Subpixel positioning is disabled in mono and bitmap modes.
+                font.subpx_dir = SubpixelDirection::None;
+            }
+            FontRenderMode::Alpha => {
+                font.color = ColorU::new(255, 255, 255, 255);
+            }
+            FontRenderMode::Subpixel => {
+                // In subpixel mode, we only actually need the color if preblending
+                // is used in the font backend.
+                if !FontContext::has_gamma_correct_subpixel_aa() {
+                    font.color = ColorU::new(255, 255, 255, 255);
+                }
+            }
+        }
+    }
+
     pub fn request_glyphs(
         &mut self,
         glyph_cache: &mut GlyphCache,
         font: FontInstance,
         glyph_keys: &[GlyphKey],
         texture_cache: &mut TextureCache,
         gpu_cache: &mut GpuCache,
     ) {
@@ -178,17 +201,17 @@ impl GlyphRasterizer {
                                     height: glyph_info.size.height,
                                     stride: None,
                                     format: ImageFormat::BGRA8,
                                     is_opaque: false,
                                     offset: 0,
                                 },
                                 TextureFilter::Linear,
                                 ImageData::Raw(glyph_info.glyph_bytes.clone()),
-                                [glyph_info.offset.x, glyph_info.offset.y],
+                                [glyph_info.offset.x, glyph_info.offset.y, glyph_info.scale],
                                 None,
                                 gpu_cache,
                             );
                         }
                     }
                 }
                 Entry::Vacant(..) => {
                     let request = GlyphRequest::new(&font, key);
@@ -241,20 +264,20 @@ impl GlyphRasterizer {
         font: &FontInstance,
         glyph_key: &GlyphKey,
     ) -> Option<GlyphDimensions> {
         self.font_contexts
             .lock_shared_context()
             .get_glyph_dimensions(font, glyph_key)
     }
 
-    pub fn is_bitmap_font(&self, font_key: FontKey) -> bool {
+    pub fn is_bitmap_font(&self, font: &FontInstance) -> bool {
         self.font_contexts
             .lock_shared_context()
-            .is_bitmap_font(font_key)
+            .is_bitmap_font(font)
     }
 
     pub fn get_glyph_index(&mut self, font_key: FontKey, ch: char) -> Option<u32> {
         self.font_contexts
             .lock_shared_context()
             .get_glyph_index(font_key, ch)
     }
 
@@ -307,25 +330,26 @@ impl GlyphRasterizer {
                             height: glyph.height,
                             stride: None,
                             format: ImageFormat::BGRA8,
                             is_opaque: false,
                             offset: 0,
                         },
                         TextureFilter::Linear,
                         ImageData::Raw(glyph_bytes.clone()),
-                        [glyph.left, glyph.top],
+                        [glyph.left, glyph.top, glyph.scale],
                         None,
                         gpu_cache,
                     );
                     Some(CachedGlyphInfo {
                         texture_cache_handle,
                         glyph_bytes,
                         size: DeviceUintSize::new(glyph.width, glyph.height),
                         offset: DevicePoint::new(glyph.left, glyph.top),
+                        scale: glyph.scale,
                     })
                 } else {
                     None
                 });
 
             let glyph_key_cache = glyph_cache.get_glyph_key_cache_for_font_mut(job.request.font);
 
             glyph_key_cache.insert(job.request.key, glyph_info);
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -64,16 +64,17 @@ mod frame;
 mod frame_builder;
 mod freelist;
 mod geometry;
 mod glyph_cache;
 mod glyph_rasterizer;
 mod gpu_cache;
 mod gpu_types;
 mod internal_types;
+mod picture;
 mod prim_store;
 mod print_tree;
 mod profiler;
 mod record;
 mod render_backend;
 mod render_task;
 mod renderer;
 mod resource_cache;
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/src/picture.rs
@@ -0,0 +1,79 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ClipAndScrollInfo, Shadow};
+use prim_store::PrimitiveIndex;
+use render_task::RenderTaskId;
+
+/*
+ A picture represents a dynamically rendered image. It consists of:
+
+ * A number of primitives that are drawn onto the picture.
+ * A composite operation describing how to composite this
+   picture into its parent.
+ * A configuration describing how to draw the primitives on
+   this picture (e.g. in screen space or local space).
+ */
+
+#[derive(Clone, Debug)]
+pub struct PrimitiveRun {
+    pub prim_index: PrimitiveIndex,
+    pub count: usize,
+    pub clip_and_scroll: ClipAndScrollInfo,
+}
+
+#[derive(Debug)]
+pub enum CompositeOp {
+    Shadow(Shadow),
+
+    // TODO(gw): Support other composite ops, such
+    //           as blur, blend etc.
+}
+
+#[derive(Debug)]
+pub struct PicturePrimitive {
+    pub prim_runs: Vec<PrimitiveRun>,
+    pub composite_op: CompositeOp,
+    pub render_task_id: Option<RenderTaskId>,
+
+    // TODO(gw): Add a mode that specifies if this
+    //           picture should be rasterized in
+    //           screen-space or local-space.
+}
+
+impl PicturePrimitive {
+    pub fn new_shadow(shadow: Shadow) -> PicturePrimitive {
+        PicturePrimitive {
+            prim_runs: Vec::new(),
+            composite_op: CompositeOp::Shadow(shadow),
+            render_task_id: None,
+        }
+    }
+
+    pub fn as_shadow(&self) -> &Shadow {
+        match self.composite_op {
+            CompositeOp::Shadow(ref shadow) => shadow,
+        }
+    }
+
+    pub fn add_primitive(
+        &mut self,
+        prim_index: PrimitiveIndex,
+        clip_and_scroll: ClipAndScrollInfo
+    ) {
+        if let Some(ref mut run) = self.prim_runs.last_mut() {
+            if run.clip_and_scroll == clip_and_scroll &&
+               run.prim_index.0 + run.count == prim_index.0 {
+                run.count += 1;
+                return;
+            }
+        }
+
+        self.prim_runs.push(PrimitiveRun {
+            prim_index,
+            count: 1,
+            clip_and_scroll,
+        });
+    }
+}
--- a/gfx/webrender/src/platform/macos/font.rs
+++ b/gfx/webrender/src/platform/macos/font.rs
@@ -37,26 +37,28 @@ pub struct FontContext {
 // all hidden inside their font context.
 unsafe impl Send for FontContext {}
 
 pub struct RasterizedGlyph {
     pub top: f32,
     pub left: f32,
     pub width: u32,
     pub height: u32,
+    pub scale: f32,
     pub bytes: Vec<u8>,
 }
 
 impl RasterizedGlyph {
     pub fn blank() -> RasterizedGlyph {
         RasterizedGlyph {
             top: 0.0,
             left: 0.0,
             width: 0,
             height: 0,
+            scale: 1.0,
             bytes: vec![],
         }
     }
 }
 
 struct GlyphMetrics {
     rasterized_left: i32,
     rasterized_descent: i32,
@@ -417,28 +419,30 @@ impl FontContext {
                 let r = pixel[2];
                 let a = pixel[3];
                 print!("({}, {}, {}, {}) ", r, g, b, a);
             }
             println!("");
         }
     }
 
-    pub fn is_bitmap_font(&mut self, font_key: FontKey) -> bool {
-        match self.get_ct_font(font_key, Au(16 * 60), &[]) {
+    pub fn is_bitmap_font(&mut self, font: &FontInstance) -> bool {
+        match self.get_ct_font(font.font_key, font.size, &font.variations) {
             Some(ref ct_font) => {
                 let traits = ct_font.symbolic_traits();
                 (traits & kCTFontColorGlyphsTrait) != 0
             }
-            None => {
-                false
-            }
+            None => false,
         }
     }
 
+    pub fn has_gamma_correct_subpixel_aa() -> bool {
+        true
+    }
+
     pub fn rasterize_glyph(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<RasterizedGlyph> {
         let ct_font = match self.get_ct_font(font.font_key, font.size, &font.variations) {
             Some(font) => font,
             None => return Some(RasterizedGlyph::blank()),
@@ -580,12 +584,13 @@ impl FontContext {
             );
         }
 
         Some(RasterizedGlyph {
             left: metrics.rasterized_left as f32,
             top: metrics.rasterized_ascent as f32,
             width: metrics.rasterized_width,
             height: metrics.rasterized_height,
+            scale: 1.0,
             bytes: rasterized_pixels,
         })
     }
 }
--- a/gfx/webrender/src/platform/unix/font.rs
+++ b/gfx/webrender/src/platform/unix/font.rs
@@ -1,36 +1,40 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{FontInstance, FontKey, FontRenderMode, GlyphDimensions};
-use api::{NativeFontHandle, SubpixelDirection};
-use api::GlyphKey;
+use api::{FontInstancePlatformOptions, FontLCDFilter, FontHinting};
+use api::{NativeFontHandle, SubpixelDirection, GlyphKey};
+use api::{FONT_FORCE_AUTOHINT, FONT_NO_AUTOHINT, FONT_EMBEDDED_BITMAP};
+use api::{FONT_EMBOLDEN, FONT_VERTICAL_LAYOUT, FONT_SUBPIXEL_BGR};
 use freetype::freetype::{FT_BBox, FT_Outline_Translate, FT_Pixel_Mode, FT_Render_Mode};
 use freetype::freetype::{FT_Done_Face, FT_Error, FT_Get_Char_Index, FT_Int32};
 use freetype::freetype::{FT_Done_FreeType, FT_Library_SetLcdFilter, FT_Pos};
 use freetype::freetype::{FT_F26Dot6, FT_Face, FT_Glyph_Format, FT_Long, FT_UInt};
-use freetype::freetype::{FT_GlyphSlot, FT_LcdFilter, FT_New_Memory_Face, FT_Outline_Transform};
+use freetype::freetype::{FT_GlyphSlot, FT_LcdFilter, FT_New_Memory_Face};
 use freetype::freetype::{FT_Init_FreeType, FT_Load_Glyph, FT_Render_Glyph};
-use freetype::freetype::{FT_Library, FT_Matrix, FT_Outline_Get_CBox, FT_Set_Char_Size};
+use freetype::freetype::{FT_Library, FT_Outline_Get_CBox, FT_Set_Char_Size, FT_Select_Size};
+use freetype::freetype::{FT_LOAD_COLOR, FT_LOAD_DEFAULT, FT_LOAD_FORCE_AUTOHINT};
+use freetype::freetype::{FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH, FT_LOAD_NO_AUTOHINT};
+use freetype::freetype::{FT_LOAD_NO_BITMAP, FT_LOAD_NO_HINTING, FT_LOAD_VERTICAL_LAYOUT};
+use freetype::freetype::{FT_FACE_FLAG_SCALABLE, FT_FACE_FLAG_FIXED_SIZES, FT_Err_Cannot_Render_Glyph};
 use internal_types::FastHashMap;
-use std::{mem, ptr, slice};
+use std::{cmp, mem, ptr, slice};
 use std::sync::Arc;
 
-// This constant is not present in the freetype
+// These constants are not present in the freetype
 // bindings due to bindgen not handling the way
-// the macro is defined.
-const FT_LOAD_TARGET_LIGHT: FT_Int32 = 1 << 16;
-
-// Default to slight hinting, which is what most
-// Linux distros use by default, and is a better
-// default than no hinting.
-// TODO(gw): Make this configurable.
-const GLYPH_LOAD_FLAGS: FT_Int32 = FT_LOAD_TARGET_LIGHT;
+// the macros are defined.
+//const FT_LOAD_TARGET_NORMAL: FT_UInt = 0 << 16;
+const FT_LOAD_TARGET_LIGHT: FT_UInt  = 1 << 16;
+const FT_LOAD_TARGET_MONO: FT_UInt   = 2 << 16;
+const FT_LOAD_TARGET_LCD: FT_UInt    = 3 << 16;
+const FT_LOAD_TARGET_LCD_V: FT_UInt  = 4 << 16;
 
 struct Face {
     face: FT_Face,
     // Raw byte data has to live until the font is deleted, according to
     // https://www.freetype.org/freetype2/docs/reference/ft2-base_interface.html#FT_New_Memory_Face
     _bytes: Arc<Vec<u8>>,
 }
 
@@ -45,51 +49,49 @@ pub struct FontContext {
 // a given FontContext so it is safe to move the latter between threads.
 unsafe impl Send for FontContext {}
 
 pub struct RasterizedGlyph {
     pub top: f32,
     pub left: f32,
     pub width: u32,
     pub height: u32,
+    pub scale: f32,
     pub bytes: Vec<u8>,
 }
 
-const SUCCESS: FT_Error = FT_Error(0);
+extern "C" {
+    fn FT_GlyphSlot_Embolden(slot: FT_GlyphSlot);
+    fn FT_GlyphSlot_Oblique(slot: FT_GlyphSlot);
+}
 
 impl FontContext {
     pub fn new() -> FontContext {
         let mut lib: FT_Library = ptr::null_mut();
 
-        // Per Skia, using a filter adds one full pixel to each side.
-        let mut lcd_extra_pixels = 1;
+        // Using an LCD filter may add one full pixel to each side if support is built in.
+        // As of FreeType 2.8.1, an LCD filter is always used regardless of settings
+        // if support for the patent-encumbered LCD filter algorithms is not built in.
+        // Thus, the only reasonable way to guess padding is to unconditonally add it if
+        // subpixel AA is used.
+        let lcd_extra_pixels = 1;
 
         unsafe {
             let result = FT_Init_FreeType(&mut lib);
             assert!(
                 result.succeeded(),
                 "Unable to initialize FreeType library {:?}",
                 result
             );
-
-            // TODO(gw): Check result of this to determine if freetype build supports subpixel.
-            let result = FT_Library_SetLcdFilter(lib, FT_LcdFilter::FT_LCD_FILTER_DEFAULT);
-
-            if !result.succeeded() {
-                println!(
-                    "WARN: Initializing a FreeType library build without subpixel AA enabled!"
-                );
-                lcd_extra_pixels = 0;
-            }
         }
 
         FontContext {
             lib,
             faces: FastHashMap::default(),
-            lcd_extra_pixels: lcd_extra_pixels,
+            lcd_extra_pixels,
         }
     }
 
     pub fn has_font(&self, font_key: &FontKey) -> bool {
         self.faces.contains_key(font_key)
     }
 
     pub fn add_raw_font(&mut self, font_key: &FontKey, bytes: Arc<Vec<u8>>, index: u32) {
@@ -127,35 +129,81 @@ impl FontContext {
             let result = unsafe { FT_Done_Face(face.face) };
             assert!(result.succeeded());
         }
     }
 
     fn load_glyph(&self, font: &FontInstance, glyph: &GlyphKey) -> Option<FT_GlyphSlot> {
         debug_assert!(self.faces.contains_key(&font.font_key));
         let face = self.faces.get(&font.font_key).unwrap();
-        let char_size = font.size.to_f64_px() * 64.0 + 0.5;
+
+        let mut load_flags = FT_LOAD_DEFAULT;
+        let FontInstancePlatformOptions { flags, hinting, .. } = font.platform_options.unwrap_or_default();
+        match (hinting, font.render_mode) {
+            (FontHinting::None, _) => load_flags |= FT_LOAD_NO_HINTING,
+            (FontHinting::Mono, _) => load_flags = FT_LOAD_TARGET_MONO,
+            (FontHinting::Light, _) => load_flags = FT_LOAD_TARGET_LIGHT,
+            (FontHinting::LCD, FontRenderMode::Subpixel) => {
+                load_flags = match font.subpx_dir {
+                    SubpixelDirection::Vertical => FT_LOAD_TARGET_LCD_V,
+                    _ => FT_LOAD_TARGET_LCD,
+                };
+                if (flags & FONT_FORCE_AUTOHINT) != 0 {
+                    load_flags |= FT_LOAD_FORCE_AUTOHINT;
+                }
+            }
+            _ => {
+                if (flags & FONT_FORCE_AUTOHINT) != 0 {
+                    load_flags |= FT_LOAD_FORCE_AUTOHINT;
+                }
+            }
+        }
 
-        assert_eq!(SUCCESS, unsafe {
-            FT_Set_Char_Size(face.face, char_size as FT_F26Dot6, 0, 0, 0)
-        });
+        if (flags & FONT_NO_AUTOHINT) != 0 {
+            load_flags |= FT_LOAD_NO_AUTOHINT;
+        }
+        if (flags & FONT_EMBEDDED_BITMAP) == 0 {
+            load_flags |= FT_LOAD_NO_BITMAP;
+        }
+        if (flags & FONT_VERTICAL_LAYOUT) != 0 {
+            load_flags |= FT_LOAD_VERTICAL_LAYOUT;
+        }
+
+        load_flags |= FT_LOAD_COLOR;
+        load_flags |= FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH;
 
-        let result = unsafe { FT_Load_Glyph(face.face, glyph.index as FT_UInt, GLYPH_LOAD_FLAGS) };
+        let mut result = if font.render_mode == FontRenderMode::Bitmap {
+            if (load_flags & FT_LOAD_NO_BITMAP) != 0 {
+                FT_Error(FT_Err_Cannot_Render_Glyph as i32)
+            } else {
+                self.choose_bitmap_size(face.face, font.size.to_f64_px())
+            }
+        } else {
+            let char_size = font.size.to_f64_px() * 64.0 + 0.5;
+            unsafe { FT_Set_Char_Size(face.face, char_size as FT_F26Dot6, 0, 0, 0) }
+        };
 
-        if result == SUCCESS {
+        if result.succeeded() {
+            result = unsafe { FT_Load_Glyph(face.face, glyph.index as FT_UInt, load_flags as FT_Int32) };
+        };
+
+        if result.succeeded() {
             let slot = unsafe { (*face.face).glyph };
             assert!(slot != ptr::null_mut());
 
-            // TODO(gw): We use the FT_Outline_* APIs to manage sub-pixel offsets.
-            //           We will need a custom code path for bitmap fonts (which
-            //           are very rare).
-            match unsafe { (*slot).format } {
-                FT_Glyph_Format::FT_GLYPH_FORMAT_OUTLINE => Some(slot),
+            if (flags & FONT_EMBOLDEN) != 0 {
+                unsafe { FT_GlyphSlot_Embolden(slot) };
+            }
+
+            let format = unsafe { (*slot).format };
+            match format {
+                FT_Glyph_Format::FT_GLYPH_FORMAT_OUTLINE |
+                FT_Glyph_Format::FT_GLYPH_FORMAT_BITMAP => Some(slot),
                 _ => {
-                    error!("TODO: Support bitmap fonts!");
+                    error!("Unsupported {:?}", format);
                     None
                 }
             }
         } else {
             error!(
                 "Unable to load glyph for {} of size {:?} from font {:?}, {:?}",
                 glyph.index,
                 font.size,
@@ -173,44 +221,47 @@ impl FontContext {
         font: &FontInstance,
         glyph: &GlyphKey,
     ) -> FT_BBox {
         let mut cbox: FT_BBox = unsafe { mem::uninitialized() };
 
         // Get the estimated bounding box from FT (control points).
         unsafe {
             FT_Outline_Get_CBox(&(*slot).outline, &mut cbox);
+
+            // For spaces and other non-printable characters, early out.
+            if (*slot).outline.n_contours == 0 {
+                return cbox;
+            }
         }
 
         // Convert the subpixel offset to floats.
         let (dx, dy) = font.get_subpx_offset(glyph);
 
         // Apply extra pixel of padding for subpixel AA, due to the filter.
         let padding = match font.render_mode {
-            FontRenderMode::Subpixel => self.lcd_extra_pixels * 64,
+            FontRenderMode::Subpixel => (self.lcd_extra_pixels * 64) as FT_Pos,
             FontRenderMode::Alpha |
             FontRenderMode::Mono |
-            FontRenderMode::Bitmap => 0,
+            FontRenderMode::Bitmap => 0 as FT_Pos,
         };
-        cbox.xMin -= padding as FT_Pos;
-        cbox.xMax += padding as FT_Pos;
 
         // Offset the bounding box by subpixel positioning.
         // Convert to 26.6 fixed point format for FT.
         match font.subpx_dir {
             SubpixelDirection::None => {}
             SubpixelDirection::Horizontal => {
                 let dx = (dx * 64.0 + 0.5) as FT_Long;
-                cbox.xMin += dx;
-                cbox.xMax += dx;
+                cbox.xMin += dx - padding;
+                cbox.xMax += dx + padding;
             }
             SubpixelDirection::Vertical => {
                 let dy = (dy * 64.0 + 0.5) as FT_Long;
-                cbox.yMin += dy;
-                cbox.yMax += dy;
+                cbox.yMin += dy - padding;
+                cbox.yMax += dy + padding;
             }
         }
 
         // Outset the box to device pixel boundaries
         cbox.xMin &= !63;
         cbox.yMin &= !63;
         cbox.xMax = (cbox.xMax + 63) & !63;
         cbox.yMax = (cbox.yMax + 63) & !63;
@@ -218,33 +269,68 @@ impl FontContext {
         cbox
     }
 
     fn get_glyph_dimensions_impl(
         &self,
         slot: FT_GlyphSlot,
         font: &FontInstance,
         glyph: &GlyphKey,
+        scale_bitmaps: bool,
     ) -> Option<GlyphDimensions> {
         let metrics = unsafe { &(*slot).metrics };
 
         // If there's no advance, no need to consider this glyph
         // for layout.
         if metrics.horiAdvance == 0 {
-            None
-        } else {
-            let cbox = self.get_bounding_box(slot, font, glyph);
+            return None
+        }
 
-            Some(GlyphDimensions {
-                left: (cbox.xMin >> 6) as i32,
-                top: (cbox.yMax >> 6) as i32,
-                width: ((cbox.xMax - cbox.xMin) >> 6) as u32,
-                height: ((cbox.yMax - cbox.yMin) >> 6) as u32,
-                advance: metrics.horiAdvance as f32 / 64.0,
-            })
+        let advance = metrics.horiAdvance as f32 / 64.0;
+        match unsafe { (*slot).format } {
+            FT_Glyph_Format::FT_GLYPH_FORMAT_BITMAP => {
+                let left = unsafe { (*slot).bitmap_left };
+                let top = unsafe { (*slot).bitmap_top };
+                let width = unsafe { (*slot).bitmap.width };
+                let height = unsafe { (*slot).bitmap.rows };
+                if scale_bitmaps {
+                    let y_size = unsafe { (*(*(*slot).face).size).metrics.y_ppem };
+                    let scale = font.size.to_f32_px() / y_size as f32;
+                    let x0 = left as f32 * scale;
+                    let x1 = width as f32 * scale + x0;
+                    let y1 = top as f32 * scale;
+                    let y0 = y1 - height as f32 * scale;
+                    Some(GlyphDimensions {
+                        left: x0.round() as i32,
+                        top: y1.round() as i32,
+                        width: (x1.ceil() - x0.floor()) as u32,
+                        height: (y1.ceil() - y0.floor()) as u32,
+                        advance: advance * scale,
+                    })
+                } else {
+                    Some(GlyphDimensions {
+                        left,
+                        top,
+                        width,
+                        height,
+                        advance,
+                    })
+                }
+            }
+            FT_Glyph_Format::FT_GLYPH_FORMAT_OUTLINE => {
+                let cbox = self.get_bounding_box(slot, font, glyph);
+                Some(GlyphDimensions {
+                    left: (cbox.xMin >> 6) as i32,
+                    top: (cbox.yMax >> 6) as i32,
+                    width: ((cbox.xMax - cbox.xMin) >> 6) as u32,
+                    height: ((cbox.yMax - cbox.yMin) >> 6) as u32,
+                    advance,
+                })
+            }
+            _ => None,
         }
     }
 
     pub fn get_glyph_index(&mut self, font_key: FontKey, ch: char) -> Option<u32> {
         let face = self.faces.get(&font_key).expect("Unknown font key!");
         unsafe {
             let idx = FT_Get_Char_Index(face.face, ch as _);
             if idx != 0 {
@@ -256,52 +342,64 @@ impl FontContext {
     }
 
     pub fn get_glyph_dimensions(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<GlyphDimensions> {
         let slot = self.load_glyph(font, key);
-        slot.and_then(|slot| self.get_glyph_dimensions_impl(slot, font, key))
+        slot.and_then(|slot| self.get_glyph_dimensions_impl(slot, font, key, true))
+    }
+
+    pub fn is_bitmap_font(&mut self, font: &FontInstance) -> bool {
+        debug_assert!(self.faces.contains_key(&font.font_key));
+        let face = self.faces.get(&font.font_key).unwrap();
+        let face_flags = unsafe { (*face.face).face_flags };
+        // If the face has embedded bitmaps, they should only be used if either
+        // embedded bitmaps are explicitly requested or if the face has no outline.
+        if (face_flags & (FT_FACE_FLAG_FIXED_SIZES as FT_Long)) != 0 {
+            let FontInstancePlatformOptions { flags, .. } = font.platform_options.unwrap_or_default();
+            if (flags & FONT_EMBEDDED_BITMAP) != 0 {
+                return true;
+            }
+            (face_flags & (FT_FACE_FLAG_SCALABLE as FT_Long)) == 0
+        } else {
+            false
+        }
     }
 
-    pub fn is_bitmap_font(&mut self, _font_key: FontKey) -> bool {
-        // TODO(gw): Support bitmap fonts in Freetype.
+    fn choose_bitmap_size(&self, face: FT_Face, requested_size: f64) -> FT_Error {
+        let mut best_dist = unsafe { *(*face).available_sizes.offset(0) }.y_ppem as f64 / 64.0 - requested_size;
+        let mut best_size = 0;
+        let num_fixed_sizes = unsafe { (*face).num_fixed_sizes };
+        for i in 1 .. num_fixed_sizes {
+            // Distance is positive if strike is larger than desired size,
+            // or negative if smaller. If previously a found smaller strike,
+            // then prefer a larger strike. Otherwise, minimize distance.
+            let dist = unsafe { *(*face).available_sizes.offset(i as isize) }.y_ppem as f64 / 64.0 - requested_size;
+            if (best_dist < 0.0 && dist >= best_dist) || dist.abs() <= best_dist {
+                best_dist = dist;
+                best_size = i;
+            }
+        }
+        unsafe { FT_Select_Size(face, best_size) }
+    }
+
+    pub fn has_gamma_correct_subpixel_aa() -> bool {
+        // We don't do any preblending with FreeType currently, so the color is not used.
         false
     }
 
-    pub fn rasterize_glyph(
+    fn rasterize_glyph_outline(
         &mut self,
+        slot: FT_GlyphSlot,
         font: &FontInstance,
         key: &GlyphKey,
-    ) -> Option<RasterizedGlyph> {
-        let slot = match self.load_glyph(font, key) {
-            Some(slot) => slot,
-            None => return None,
-        };
-
-        let render_mode = match font.render_mode {
-            FontRenderMode::Mono => FT_Render_Mode::FT_RENDER_MODE_MONO,
-            FontRenderMode::Alpha => FT_Render_Mode::FT_RENDER_MODE_NORMAL,
-            FontRenderMode::Subpixel => FT_Render_Mode::FT_RENDER_MODE_LCD,
-            FontRenderMode::Bitmap => FT_Render_Mode::FT_RENDER_MODE_NORMAL,
-        };
-
-        // Get dimensions of the glyph, to see if we need to rasterize it.
-        let dimensions = match self.get_glyph_dimensions_impl(slot, font, key) {
-            Some(val) => val,
-            None => return None,
-        };
-
-        // For spaces and other non-printable characters, early out.
-        if dimensions.width == 0 || dimensions.height == 0 {
-            return None;
-        }
-
+    ) -> bool {
         // Get the subpixel offsets in FT 26.6 format.
         let (dx, dy) = font.get_subpx_offset(key);
         let dx = (dx * 64.0 + 0.5) as FT_Long;
         let dy = (dy * 64.0 + 0.5) as FT_Long;
 
         // Move the outline curves to be at the origin, taking
         // into account the subpixel positioning.
         unsafe {
@@ -310,103 +408,214 @@ impl FontContext {
             FT_Outline_Get_CBox(outline, &mut cbox);
             FT_Outline_Translate(
                 outline,
                 dx - ((cbox.xMin + dx) & !63),
                 dy - ((cbox.yMin + dy) & !63),
             );
 
             if font.synthetic_italics {
-                // These magic numbers are pre-encoded fixed point
-                // values that apply ~12 degree shear. Borrowed
-                // from the Freetype implementation of the
-                // FT_GlyphSlot_Oblique function.
-                let transform = FT_Matrix {
-                    xx: 0x10000,
-                    yx: 0x00000,
-                    xy: 0x0366A,
-                    yy: 0x10000,
-                };
-                FT_Outline_Transform(outline, &transform);
+                FT_GlyphSlot_Oblique(slot);
             }
         }
 
+        if font.render_mode == FontRenderMode::Subpixel {
+            let FontInstancePlatformOptions { lcd_filter, .. } = font.platform_options.unwrap_or_default();
+            let filter = match lcd_filter {
+                FontLCDFilter::None => FT_LcdFilter::FT_LCD_FILTER_NONE,
+                FontLCDFilter::Default => FT_LcdFilter::FT_LCD_FILTER_DEFAULT,
+                FontLCDFilter::Light => FT_LcdFilter::FT_LCD_FILTER_LIGHT,
+                FontLCDFilter::Legacy => FT_LcdFilter::FT_LCD_FILTER_LEGACY,
+            };
+            unsafe { FT_Library_SetLcdFilter(self.lib, filter) };
+        }
+        let render_mode = match (font.render_mode, font.subpx_dir) {
+            (FontRenderMode::Mono, _) => FT_Render_Mode::FT_RENDER_MODE_MONO,
+            (FontRenderMode::Alpha, _) | (FontRenderMode::Bitmap, _) => FT_Render_Mode::FT_RENDER_MODE_NORMAL,
+            (FontRenderMode::Subpixel, SubpixelDirection::Vertical) => FT_Render_Mode::FT_RENDER_MODE_LCD_V,
+            (FontRenderMode::Subpixel, _) => FT_Render_Mode::FT_RENDER_MODE_LCD,
+        };
         let result = unsafe { FT_Render_Glyph(slot, render_mode) };
-        if result != SUCCESS {
+        if !result.succeeded() {
             error!(
                 "Unable to rasterize {:?} with {:?}, {:?}",
                 key,
                 render_mode,
                 result
             );
+            false
+        } else {
+            true
+        }
+    }
+
+    pub fn rasterize_glyph(
+        &mut self,
+        font: &FontInstance,
+        key: &GlyphKey,
+    ) -> Option<RasterizedGlyph> {
+        let slot = match self.load_glyph(font, key) {
+            Some(slot) => slot,
+            None => return None,
+        };
+
+        // Get dimensions of the glyph, to see if we need to rasterize it.
+        let dimensions = match self.get_glyph_dimensions_impl(slot, font, key, false) {
+            Some(val) => val,
+            None => return None,
+        };
+
+        // For spaces and other non-printable characters, early out.
+        if dimensions.width == 0 || dimensions.height == 0 {
             return None;
         }
 
+        let format = unsafe { (*slot).format };
+        let mut scale = 1.0;
+        match format {
+            FT_Glyph_Format::FT_GLYPH_FORMAT_BITMAP => {
+                let y_size = unsafe { (*(*(*slot).face).size).metrics.y_ppem };
+                scale = font.size.to_f32_px() / y_size as f32;
+            }
+            FT_Glyph_Format::FT_GLYPH_FORMAT_OUTLINE => {
+                if !self.rasterize_glyph_outline(slot, font, key) {
+                    return None;
+                }
+            }
+            _ => {
+                error!("Unsupported {:?}", format);
+                return None;
+            }
+        }
+
         let bitmap = unsafe { &(*slot).bitmap };
         let pixel_mode = unsafe { mem::transmute(bitmap.pixel_mode as u32) };
         info!(
             "Rasterizing {:?} as {:?} with dimensions {:?}",
             key,
-            render_mode,
+            font.render_mode,
             dimensions
         );
 
-        let actual_width = match pixel_mode {
+        let (actual_width, actual_height) = match pixel_mode {
             FT_Pixel_Mode::FT_PIXEL_MODE_LCD => {
                 assert!(bitmap.width % 3 == 0);
-                bitmap.width / 3
+                ((bitmap.width / 3) as i32, bitmap.rows as i32)
             }
-            FT_Pixel_Mode::FT_PIXEL_MODE_MONO | FT_Pixel_Mode::FT_PIXEL_MODE_GRAY => bitmap.width,
-            _ => {
-                panic!("Unexpected pixel mode!");
+            FT_Pixel_Mode::FT_PIXEL_MODE_LCD_V => {
+                assert!(bitmap.rows % 3 == 0);
+                (bitmap.width as i32, (bitmap.rows / 3) as i32)
             }
-        } as i32;
-
-        let actual_height = bitmap.rows as i32;
-        let top = unsafe { (*slot).bitmap_top };
-        let left = unsafe { (*slot).bitmap_left };
+            FT_Pixel_Mode::FT_PIXEL_MODE_MONO | FT_Pixel_Mode::FT_PIXEL_MODE_GRAY | FT_Pixel_Mode::FT_PIXEL_MODE_BGRA => {
+                (bitmap.width as i32, bitmap.rows as i32)
+            }
+            _ => panic!("Unsupported {:?}", pixel_mode),
+        };
+        let (left, top) = unsafe { ((*slot).bitmap_left, (*slot).bitmap_top) };
         let mut final_buffer = vec![0; (actual_width * actual_height * 4) as usize];
 
         // Extract the final glyph from FT format into RGBA8 format, which is
         // what WR expects.
-        for y in 0 .. actual_height {
-            // Get pointer to the bytes for this row.
-            let mut src = unsafe { bitmap.buffer.offset((y * bitmap.pitch) as isize) };
-
-            for x in 0 .. actual_width {
-                let value = match pixel_mode {
-                    FT_Pixel_Mode::FT_PIXEL_MODE_MONO => {
-                        let mask = 0x80 >> (x & 0x7);
-                        let byte = unsafe { *src.offset((x >> 3) as isize) };
-                        let alpha = if byte & mask != 0 { 0xff } else { 0 };
-                        [0xff, 0xff, 0xff, alpha]
+        let FontInstancePlatformOptions { flags, .. } = font.platform_options.unwrap_or_default();
+        let subpixel_bgr = (flags & FONT_SUBPIXEL_BGR) != 0;
+        let mut src_row = bitmap.buffer;
+        let mut dest: usize = 0;
+        while dest < final_buffer.len() {
+            let mut src = src_row;
+            let row_end = dest + actual_width as usize * 4;
+            match pixel_mode {
+                FT_Pixel_Mode::FT_PIXEL_MODE_MONO => {
+                    while dest < row_end {
+                        // Cast the byte to signed so that we can left shift each bit into
+                        // the top bit, then right shift to fill out the bits with 0s or 1s.
+                        let mut byte: i8 = unsafe { *src as i8 };
+                        src = unsafe { src.offset(1) };
+                        let byte_end = cmp::min(row_end, dest + 8 * 4);
+                        while dest < byte_end {
+                            let alpha = (byte >> 7) as u8;
+                            final_buffer[dest + 0] = alpha;
+                            final_buffer[dest + 1] = alpha;
+                            final_buffer[dest + 2] = alpha;
+                            final_buffer[dest + 3] = alpha;
+                            dest += 4;
+                            byte <<= 1;
+                        }
                     }
-                    FT_Pixel_Mode::FT_PIXEL_MODE_GRAY => {
+                }
+                FT_Pixel_Mode::FT_PIXEL_MODE_GRAY => {
+                    while dest < row_end {
                         let alpha = unsafe { *src };
+                        final_buffer[dest + 0] = alpha;
+                        final_buffer[dest + 1] = alpha;
+                        final_buffer[dest + 2] = alpha;
+                        final_buffer[dest + 3] = alpha;
                         src = unsafe { src.offset(1) };
-                        [0xff, 0xff, 0xff, alpha]
+                        dest += 4;
+                    }
+                }
+                FT_Pixel_Mode::FT_PIXEL_MODE_LCD => {
+                    if subpixel_bgr {
+                        while dest < row_end {
+                            final_buffer[dest + 0] = unsafe { *src };
+                            final_buffer[dest + 1] = unsafe { *src.offset(1) };
+                            final_buffer[dest + 2] = unsafe { *src.offset(2) };
+                            final_buffer[dest + 3] = 0xff;
+                            src = unsafe { src.offset(3) };
+                            dest += 4;
+                        }
+                    } else {
+                        while dest < row_end {
+                            final_buffer[dest + 2] = unsafe { *src };
+                            final_buffer[dest + 1] = unsafe { *src.offset(1) };
+                            final_buffer[dest + 0] = unsafe { *src.offset(2) };
+                            final_buffer[dest + 3] = 0xff;
+                            src = unsafe { src.offset(3) };
+                            dest += 4;
+                        }
                     }
-                    FT_Pixel_Mode::FT_PIXEL_MODE_LCD => {
-                        let t = unsafe { slice::from_raw_parts(src, 3) };
-                        src = unsafe { src.offset(3) };
-                        [t[2], t[1], t[0], 0xff]
+                }
+                FT_Pixel_Mode::FT_PIXEL_MODE_LCD_V => {
+                    if subpixel_bgr {
+                        while dest < row_end {
+                            final_buffer[dest + 0] = unsafe { *src };
+                            final_buffer[dest + 1] = unsafe { *src.offset(bitmap.pitch as isize) };
+                            final_buffer[dest + 2] = unsafe { *src.offset((2 * bitmap.pitch) as isize) };
+                            final_buffer[dest + 3] = 0xff;
+                            src = unsafe { src.offset(1) };
+                            dest += 4;
+                        }
+                    } else {
+                        while dest < row_end {
+                            final_buffer[dest + 2] = unsafe { *src };
+                            final_buffer[dest + 1] = unsafe { *src.offset(bitmap.pitch as isize) };
+                            final_buffer[dest + 0] = unsafe { *src.offset((2 * bitmap.pitch) as isize) };
+                            final_buffer[dest + 3] = 0xff;
+                            src = unsafe { src.offset(1) };
+                            dest += 4;
+                        }
                     }
-                    _ => panic!("Unsupported {:?}", pixel_mode),
-                };
-                let i = 4 * (y * actual_width + x) as usize;
-                let dest = &mut final_buffer[i .. i + 4];
-                dest.clone_from_slice(&value);
+                    src_row = unsafe { src_row.offset((2 * bitmap.pitch) as isize) };
+                }
+                FT_Pixel_Mode::FT_PIXEL_MODE_BGRA => {
+                    // The source is premultiplied BGRA data.
+                    let dest_slice = &mut final_buffer[dest .. row_end];
+                    let src_slice = unsafe { slice::from_raw_parts(src, dest_slice.len()) };
+                    dest_slice.copy_from_slice(src_slice);
+                }
+                _ => panic!("Unsupported {:?}", pixel_mode),
             }
+            src_row = unsafe { src_row.offset(bitmap.pitch as isize) };
         }
 
         Some(RasterizedGlyph {
-            left: (dimensions.left + left) as f32,
-            top: (dimensions.top + top - actual_height) as f32,
+            left: ((dimensions.left + left) as f32 * scale).round(),
+            top: ((dimensions.top + top - actual_height) as f32 * scale).round(),
             width: actual_width as u32,
             height: actual_height as u32,
+            scale,
             bytes: final_buffer,
         })
     }
 }
 
 impl Drop for FontContext {
     fn drop(&mut self) {
         unsafe {
--- a/gfx/webrender/src/platform/windows/font.rs
+++ b/gfx/webrender/src/platform/windows/font.rs
@@ -28,16 +28,17 @@ pub struct FontContext {
 // all hidden inside their font context.
 unsafe impl Send for FontContext {}
 
 pub struct RasterizedGlyph {
     pub top: f32,
     pub left: f32,
     pub width: u32,
     pub height: u32,
+    pub scale: f32,
     pub bytes: Vec<u8>,
 }
 
 fn dwrite_texture_type(render_mode: FontRenderMode) -> dwrote::DWRITE_TEXTURE_TYPE {
     match render_mode {
         FontRenderMode::Mono | FontRenderMode::Bitmap => dwrote::DWRITE_TEXTURE_ALIASED_1x1,
         FontRenderMode::Alpha | FontRenderMode::Subpixel => dwrote::DWRITE_TEXTURE_CLEARTYPE_3x1,
     }
@@ -299,21 +300,25 @@ impl FontContext {
                     rgba_pixels[i * 4 + 2] = pixels[i * 3 + 2];
                     rgba_pixels[i * 4 + 3] = 0xff;
                 }
                 rgba_pixels
             }
         }
     }
 
-    pub fn is_bitmap_font(&mut self, _font_key: FontKey) -> bool {
+    pub fn is_bitmap_font(&mut self, _font: &FontInstance) -> bool {
         // TODO(gw): Support bitmap fonts in DWrite.
         false
     }
 
+    pub fn has_gamma_correct_subpixel_aa() -> bool {
+        true
+    }
+
     pub fn rasterize_glyph(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<RasterizedGlyph> {
         let analysis = self.create_glyph_analysis(font, key);
         let texture_type = dwrite_texture_type(font.render_mode);
 
@@ -324,37 +329,41 @@ impl FontContext {
         // Alpha texture bounds can sometimes return an empty rect
         // Such as for spaces
         if width == 0 || height == 0 {
             return None;
         }
 
         let mut pixels = analysis.create_alpha_texture(texture_type, bounds);
 
-        if font.render_mode != FontRenderMode::Mono {
-            let lut_correction = match font.platform_options {
-                Some(option) => if option.force_gdi_rendering {
-                    &self.gdi_gamma_lut
-                } else {
-                    &self.gamma_lut
-                },
-                None => &self.gamma_lut,
-            };
+        match font.render_mode {
+            FontRenderMode::Mono | FontRenderMode::Bitmap => {}
+            FontRenderMode::Alpha | FontRenderMode::Subpixel => {
+                let lut_correction = match font.platform_options {
+                    Some(option) => if option.force_gdi_rendering {
+                        &self.gdi_gamma_lut
+                    } else {
+                        &self.gamma_lut
+                    },
+                    None => &self.gamma_lut,
+                };
 
-            lut_correction.preblend_rgb(
-                &mut pixels,
-                width,
-                height,
-                ColorLut::new(font.color.r, font.color.g, font.color.b, font.color.a),
-            );
+                lut_correction.preblend_rgb(
+                    &mut pixels,
+                    width,
+                    height,
+                    ColorLut::new(font.color.r, font.color.g, font.color.b, font.color.a),
+                );
+            }
         }
 
         let rgba_pixels = self.convert_to_rgba(&mut pixels, font.render_mode);
 
         Some(RasterizedGlyph {
             left: bounds.left as f32,
             top: -bounds.top as f32,
             width: width as u32,
             height: height as u32,
+            scale: 1.0,
             bytes: rgba_pixels,
         })
     }
 }
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -1,24 +1,25 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, BuiltDisplayList, ColorF, ComplexClipRegion, DeviceIntRect, DeviceIntSize};
 use api::{DevicePoint, ExtendMode, FontInstance, FontRenderMode, GlyphInstance, GlyphKey};
 use api::{GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerRect};
-use api::{LayerSize, LayerVector2D, LineOrientation, LineStyle, Shadow};
+use api::{LayerSize, LayerVector2D, LineOrientation, LineStyle};
 use api::{TileOffset, YuvColorSpace, YuvFormat, device_length};
 use app_units::Au;
 use border::BorderCornerInstance;
 use clip::{ClipMode, ClipSourcesHandle, ClipStore, Geometry};
 use euclid::Size2D;
 use frame_builder::PrimitiveContext;
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
                 ToGpuBlocks};
+use picture::PicturePrimitive;
 use render_task::{ClipWorkItem, RenderTask, RenderTaskId, RenderTaskTree};
 use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 use resource_cache::{ImageProperties, ResourceCache};
 use std::{mem, usize};
 use util::{MatrixHelpers, pack_as_float, recycle_vec, TransformedRect};
 
 #[derive(Debug, Copy, Clone)]
 pub struct PrimitiveOpacity {
@@ -105,18 +106,18 @@ pub enum PrimitiveKind {
     TextRun,
     Image,
     YuvImage,
     Border,
     AlignedGradient,
     AngleGradient,
     RadialGradient,
     BoxShadow,
-    Shadow,
     Line,
+    Picture,
 }
 
 impl GpuCacheHandle {
     pub fn as_int(&self, gpu_cache: &GpuCache) -> i32 {
         gpu_cache.get_address(self).as_int()
     }
 }
 
@@ -509,77 +510,74 @@ impl RadialGradientPrimitiveCpu {
     ) {
         request.extend_from_slice(&self.gpu_blocks);
 
         let gradient_builder = GradientGpuBlockBuilder::new(self.stops_range, display_list);
         gradient_builder.build(false, &mut request);
     }
 }
 
-#[derive(Debug)]
-pub struct ShadowPrimitiveCpu {
-    pub shadow: Shadow,
-    pub primitives: Vec<PrimitiveIndex>,
-    pub render_task_id: Option<RenderTaskId>,
-}
-
 #[derive(Debug, Clone)]
 pub struct TextRunPrimitiveCpu {
     pub font: FontInstance,
     pub offset: LayerVector2D,
     pub glyph_range: ItemRange<GlyphInstance>,
     pub glyph_count: usize,
     pub glyph_keys: Vec<GlyphKey>,
     pub glyph_gpu_blocks: Vec<GpuBlockData>,
-    pub shadow_render_mode: FontRenderMode,
-    pub color: ColorF,
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub enum TextRunMode {
     Normal,
     Shadow,
 }
 
 impl TextRunPrimitiveCpu {
+    pub fn get_font(&self,
+                    run_mode: TextRunMode,
+                    device_pixel_ratio: f32,
+    ) -> FontInstance {
+        let mut font = self.font.clone();
+        match run_mode {
+            TextRunMode::Normal => {}
+            TextRunMode::Shadow => {
+                // Shadows never use subpixel AA, but need to respect the alpha/mono flag
+                // for reftests.
+                font.render_mode = font.render_mode.limit_by(FontRenderMode::Alpha);
+            }
+        };
+        font.size = font.size.scale_by(device_pixel_ratio);
+        font
+    }
+
     fn prepare_for_render(
         &mut self,
         resource_cache: &mut ResourceCache,
         device_pixel_ratio: f32,
         display_list: &BuiltDisplayList,
         run_mode: TextRunMode,
         gpu_cache: &mut GpuCache,
     ) {
-        let mut font = self.font.clone();
-        font.size = font.size.scale_by(device_pixel_ratio);
-        match run_mode {
-            TextRunMode::Shadow => {
-                font.render_mode = self.shadow_render_mode;
-            }
-            TextRunMode::Normal => {}
-        }
-
-        if run_mode == TextRunMode::Shadow {
-            font.render_mode = self.shadow_render_mode;
-        }
+        let font = self.get_font(run_mode, device_pixel_ratio);
 
         // Cache the glyph positions, if not in the cache already.
         // TODO(gw): In the future, remove `glyph_instances`
         //           completely, and just reference the glyphs
         //           directly from the display list.
         if self.glyph_keys.is_empty() {
+            let subpx_dir = font.subpx_dir.limit_by(font.render_mode);
             let src_glyphs = display_list.get(self.glyph_range);
 
             // TODO(gw): If we support chunks() on AuxIter
             //           in the future, this code below could
             //           be much simpler...
             let mut gpu_block = GpuBlockData::empty();
-
             for (i, src) in src_glyphs.enumerate() {
-                let key = GlyphKey::new(src.index, src.point, font.render_mode, font.subpx_dir);
+                let key = GlyphKey::new(src.index, src.point, font.render_mode, subpx_dir);
                 self.glyph_keys.push(key);
 
                 // Two glyphs are packed per GPU block.
 
                 if (i & 1) == 0 {
                     gpu_block.data[0] = src.point.x;
                     gpu_block.data[1] = src.point.y;
                 } else {
@@ -595,21 +593,21 @@ impl TextRunPrimitiveCpu {
                 self.glyph_gpu_blocks.push(gpu_block);
             }
         }
 
         resource_cache.request_glyphs(font, &self.glyph_keys, gpu_cache);
     }
 
     fn write_gpu_blocks(&self, request: &mut GpuDataRequest) {
-        request.push(self.color);
+        request.push(ColorF::from(self.font.color));
         request.push([
             self.offset.x,
             self.offset.y,
-            self.font.subpx_dir as u32 as f32,
+            self.font.subpx_dir.limit_by(self.font.render_mode) as u32 as f32,
             0.0,
         ]);
         request.extend_from_slice(&self.glyph_gpu_blocks);
 
         assert!(request.current_used_block_num() <= MAX_VERTEX_TEXTURE_WIDTH);
     }
 }
 
@@ -802,58 +800,58 @@ pub enum PrimitiveContainer {
     TextRun(TextRunPrimitiveCpu),
     Image(ImagePrimitiveCpu),
     YuvImage(YuvImagePrimitiveCpu),
     Border(BorderPrimitiveCpu),
     AlignedGradient(GradientPrimitiveCpu),
     AngleGradient(GradientPrimitiveCpu),
     RadialGradient(RadialGradientPrimitiveCpu),
     BoxShadow(BoxShadowPrimitiveCpu),
-    Shadow(ShadowPrimitiveCpu),
+    Picture(PicturePrimitive),
     Line(LinePrimitive),
 }
 
 pub struct PrimitiveStore {
     /// CPU side information only.
     pub cpu_rectangles: Vec<RectanglePrimitive>,
     pub cpu_text_runs: Vec<TextRunPrimitiveCpu>,
-    pub cpu_shadows: Vec<ShadowPrimitiveCpu>,
+    pub cpu_pictures: Vec<PicturePrimitive>,
     pub cpu_images: Vec<ImagePrimitiveCpu>,
     pub cpu_yuv_images: Vec<YuvImagePrimitiveCpu>,
     pub cpu_gradients: Vec<GradientPrimitiveCpu>,
     pub cpu_radial_gradients: Vec<RadialGradientPrimitiveCpu>,
     pub cpu_metadata: Vec<PrimitiveMetadata>,
     pub cpu_borders: Vec<BorderPrimitiveCpu>,
     pub cpu_box_shadows: Vec<BoxShadowPrimitiveCpu>,
     pub cpu_lines: Vec<LinePrimitive>,
 }
 
 impl PrimitiveStore {
     pub fn new() -> PrimitiveStore {
         PrimitiveStore {
             cpu_metadata: Vec::new(),
             cpu_rectangles: Vec::new(),
             cpu_text_runs: Vec::new(),
-            cpu_shadows: Vec::new(),
+            cpu_pictures: Vec::new(),
             cpu_images: Vec::new(),
             cpu_yuv_images: Vec::new(),
             cpu_gradients: Vec::new(),
             cpu_radial_gradients: Vec::new(),
             cpu_borders: Vec::new(),
             cpu_box_shadows: Vec::new(),
             cpu_lines: Vec::new(),
         }
     }
 
     pub fn recycle(self) -> Self {
         PrimitiveStore {
             cpu_metadata: recycle_vec(self.cpu_metadata),
             cpu_rectangles: recycle_vec(self.cpu_rectangles),
             cpu_text_runs: recycle_vec(self.cpu_text_runs),
-            cpu_shadows: recycle_vec(self.cpu_shadows),
+            cpu_pictures: recycle_vec(self.cpu_pictures),
             cpu_images: recycle_vec(self.cpu_images),
             cpu_yuv_images: recycle_vec(self.cpu_yuv_images),
             cpu_gradients: recycle_vec(self.cpu_gradients),
             cpu_radial_gradients: recycle_vec(self.cpu_radial_gradients),
             cpu_borders: recycle_vec(self.cpu_borders),
             cpu_box_shadows: recycle_vec(self.cpu_box_shadows),
             cpu_lines: recycle_vec(self.cpu_lines),
         }
@@ -915,25 +913,25 @@ impl PrimitiveStore {
                     prim_kind: PrimitiveKind::TextRun,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_text_runs.len()),
                     ..base_metadata
                 };
 
                 self.cpu_text_runs.push(text_cpu);
                 metadata
             }
-            PrimitiveContainer::Shadow(shadow) => {
+            PrimitiveContainer::Picture(picture) => {
                 let metadata = PrimitiveMetadata {
                     opacity: PrimitiveOpacity::translucent(),
-                    prim_kind: PrimitiveKind::Shadow,
-                    cpu_prim_index: SpecificPrimitiveIndex(self.cpu_shadows.len()),
+                    prim_kind: PrimitiveKind::Picture,
+                    cpu_prim_index: SpecificPrimitiveIndex(self.cpu_pictures.len()),
                     ..base_metadata
                 };
 
-                self.cpu_shadows.push(shadow);
+                self.cpu_pictures.push(picture);
                 metadata
             }
             PrimitiveContainer::Image(image_cpu) => {
                 let metadata = PrimitiveMetadata {
                     opacity: PrimitiveOpacity::translucent(),
                     prim_kind: PrimitiveKind::Image,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_images.len()),
                     ..base_metadata
@@ -1030,19 +1028,19 @@ impl PrimitiveStore {
         // Add any dynamic render tasks needed to render this primitive
         let metadata = &self.cpu_metadata[prim_index.0];
 
         let render_task_id = match metadata.prim_kind {
             PrimitiveKind::BoxShadow => {
                 let box_shadow = &self.cpu_box_shadows[metadata.cpu_prim_index.0];
                 box_shadow.render_task_id
             }
-            PrimitiveKind::Shadow => {
-                let shadow = &self.cpu_shadows[metadata.cpu_prim_index.0];
-                shadow.render_task_id
+            PrimitiveKind::Picture => {
+                let picture = &self.cpu_pictures[metadata.cpu_prim_index.0];
+                picture.render_task_id
             }
             PrimitiveKind::Rectangle |
             PrimitiveKind::TextRun |
             PrimitiveKind::Image |
             PrimitiveKind::AlignedGradient |
             PrimitiveKind::YuvImage |
             PrimitiveKind::Border |
             PrimitiveKind::AngleGradient |
@@ -1109,36 +1107,37 @@ impl PrimitiveStore {
                     cache_key,
                     cache_size,
                     prim_index
                 );
 
                 // ignore the new task if we are in a dependency context
                 box_shadow.render_task_id = render_tasks.map(|rt| rt.add(render_task));
             }
-            PrimitiveKind::Shadow => {
-                let shadow = &mut self.cpu_shadows[metadata.cpu_prim_index.0];
+            PrimitiveKind::Picture => {
+                let picture = &mut self.cpu_pictures[metadata.cpu_prim_index.0];
 
                 // This is a shadow element. Create a render task that will
                 // render the text run to a target, and then apply a gaussian
                 // blur to that text run in order to build the actual primitive
                 // which will be blitted to the framebuffer.
                 let cache_width =
                     (metadata.local_rect.size.width * prim_context.device_pixel_ratio).ceil() as i32;
                 let cache_height =
                     (metadata.local_rect.size.height * prim_context.device_pixel_ratio).ceil() as i32;
                 let cache_size = DeviceIntSize::new(cache_width, cache_height);
-                let blur_radius = device_length(shadow.shadow.blur_radius, prim_context.device_pixel_ratio);
+                let blur_radius = picture.as_shadow().blur_radius;
+                let blur_radius = device_length(blur_radius, prim_context.device_pixel_ratio);
 
                 // ignore new tasks if we are in a dependency context
-                shadow.render_task_id = render_tasks.map(|rt| {
-                    let prim_cache_task = RenderTask::new_prim_cache(cache_size, prim_index);
-                    let prim_cache_task_id = rt.add(prim_cache_task);
+                picture.render_task_id = render_tasks.map(|rt| {
+                    let picture_task = RenderTask::new_picture(cache_size, prim_index);
+                    let picture_task_id = rt.add(picture_task);
                     let render_task =
-                        RenderTask::new_blur(blur_radius, prim_cache_task_id, rt);
+                        RenderTask::new_blur(blur_radius, picture_task_id, rt);
                     rt.add(render_task)
                 });
             }
             PrimitiveKind::TextRun => {
                 let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
                 text.prepare_for_render(
                     resource_cache,
                     prim_context.device_pixel_ratio,
@@ -1229,23 +1228,24 @@ impl PrimitiveStore {
                 PrimitiveKind::RadialGradient => {
                     let gradient = &self.cpu_radial_gradients[metadata.cpu_prim_index.0];
                     gradient.build_gpu_blocks_for_angle_radial(prim_context.display_list, request);
                 }
                 PrimitiveKind::TextRun => {
                     let text = &self.cpu_text_runs[metadata.cpu_prim_index.0];
                     text.write_gpu_blocks(&mut request);
                 }
-                PrimitiveKind::Shadow => {
-                    let prim = &self.cpu_shadows[metadata.cpu_prim_index.0];
-                    request.push(prim.shadow.color);
+                PrimitiveKind::Picture => {
+                    let picture = &self.cpu_pictures[metadata.cpu_prim_index.0];
+                    let shadow = picture.as_shadow();
+                    request.push(shadow.color);
                     request.push([
-                        prim.shadow.offset.x,
-                        prim.shadow.offset.y,
-                        prim.shadow.blur_radius,
+                        shadow.offset.x,
+                        shadow.offset.y,
+                        shadow.blur_radius,
                         0.0,
                     ]);
                 }
             }
         }
     }
 
     fn update_clip_task(
@@ -1369,37 +1369,41 @@ impl PrimitiveStore {
                 Some(device_rect) => Geometry {
                     local_rect,
                     device_rect,
                 },
                 None => return None,
             };
 
             let dependencies = match metadata.prim_kind {
-                PrimitiveKind::Shadow =>
-                    self.cpu_shadows[metadata.cpu_prim_index.0].primitives.clone(),
+                PrimitiveKind::Picture =>
+                    self.cpu_pictures[metadata.cpu_prim_index.0].prim_runs.clone(),
                 _ => Vec::new(),
             };
             (geometry, dependencies)
         };
 
         // Recurse into any sub primitives and prepare them for rendering first.
         // TODO(gw): This code is a bit hacky to work around the borrow checker.
         //           Specifically, the clone() below on the primitive list for
         //           text shadow primitives. Consider restructuring this code to
         //           avoid borrow checker issues.
-        for sub_prim_index in dependent_primitives {
-            self.prepare_prim_for_render_inner(
-                sub_prim_index,
-                prim_context,
-                resource_cache,
-                gpu_cache,
-                None,
-                TextRunMode::Shadow,
-            );
+        for run in dependent_primitives {
+            for i in 0 .. run.count {
+                let sub_prim_index = PrimitiveIndex(run.prim_index.0 + i);
+
+                self.prepare_prim_for_render_inner(
+                    sub_prim_index,
+                    prim_context,
+                    resource_cache,
+                    gpu_cache,
+                    None,
+                    TextRunMode::Shadow,
+                );
+            }
         }
 
         if !self.update_clip_task(
             prim_index,
             prim_context,
             geometry.device_rect,
             resource_cache,
             gpu_cache,
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -707,17 +707,15 @@ impl ToDebugString for SpecificDisplayIt
             SpecificDisplayItem::RadialGradient(..) => String::from("radial_gradient"),
             SpecificDisplayItem::BoxShadow(..) => String::from("box_shadow"),
             SpecificDisplayItem::Border(..) => String::from("border"),
             SpecificDisplayItem::PushStackingContext(..) => String::from("push_stacking_context"),
             SpecificDisplayItem::Iframe(..) => String::from("iframe"),
             SpecificDisplayItem::Clip(..) => String::from("clip"),
             SpecificDisplayItem::ScrollFrame(..) => String::from("scroll_frame"),
             SpecificDisplayItem::StickyFrame(..) => String::from("sticky_frame"),
-            SpecificDisplayItem::PushNestedDisplayList => String::from("push_nested_display_list"),
-            SpecificDisplayItem::PopNestedDisplayList => String::from("pop_nested_display_list"),
             SpecificDisplayItem::SetGradientStops => String::from("set_gradient_stops"),
             SpecificDisplayItem::PopStackingContext => String::from("pop_stacking_context"),
             SpecificDisplayItem::PushShadow(..) => String::from("push_shadow"),
             SpecificDisplayItem::PopShadow => String::from("pop_shadow"),
         }
     }
 }
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -221,17 +221,17 @@ pub struct CacheMaskTask {
 #[derive(Debug)]
 pub struct RenderTaskData {
     pub data: [f32; FLOATS_PER_RENDER_TASK_INFO],
 }
 
 #[derive(Debug)]
 pub enum RenderTaskKind {
     Alpha(AlphaRenderTask),
-    CachePrimitive(PrimitiveIndex),
+    Picture(PrimitiveIndex),
     BoxShadow(PrimitiveIndex),
     CacheMask(CacheMaskTask),
     VerticalBlur(DeviceIntLength),
     HorizontalBlur(DeviceIntLength),
     Readback(DeviceIntRect),
     Alias(RenderTaskId),
 }
 
@@ -264,22 +264,22 @@ impl RenderTask {
     pub fn new_dynamic_alpha_batch(
         rect: &DeviceIntRect,
         frame_output_pipeline_id: Option<PipelineId>,
     ) -> RenderTask {
         let location = RenderTaskLocation::Dynamic(None, rect.size);
         Self::new_alpha_batch(rect.origin, location, frame_output_pipeline_id)
     }
 
-    pub fn new_prim_cache(size: DeviceIntSize, prim_index: PrimitiveIndex) -> RenderTask {
+    pub fn new_picture(size: DeviceIntSize, prim_index: PrimitiveIndex) -> RenderTask {
         RenderTask {
             cache_key: None,
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, size),
-            kind: RenderTaskKind::CachePrimitive(prim_index),
+            kind: RenderTaskKind::Picture(prim_index),
         }
     }
 
     pub fn new_box_shadow(
         key: BoxShadowPrimitiveCacheKey,
         size: DeviceIntSize,
         prim_index: PrimitiveIndex,
     ) -> RenderTask {
@@ -324,17 +324,20 @@ impl RenderTask {
                 // created (by a different clip in the list), the allocated
                 // rectangle for the mask could end up being much bigger
                 // than is actually required.
                 if !clip_info.is_masking() {
                     return false;
                 }
 
                 match clip_info.bounds.inner {
-                    Some(ref inner) if !inner.device_rect.is_empty() => {
+                    // Inner rects aren't valid if the item is not axis-aligned, which can
+                    // be determined by the apply_rectangles field. This is mostly a band-aid
+                    // until we have better handling of inner rectangles for transformed clips.
+                    Some(ref inner) if !work_item.apply_rectangles && !inner.device_rect.is_empty() => {
                         inner_rect = inner_rect.and_then(|r| r.intersection(&inner.device_rect));
                         !inner.device_rect.contains_rect(&task_rect)
                     }
                     _ => {
                         inner_rect = None;
                         true
                     }
                 }
@@ -418,30 +421,30 @@ impl RenderTask {
         };
 
         blur_task_h
     }
 
     pub fn as_alpha_batch_mut<'a>(&'a mut self) -> &'a mut AlphaRenderTask {
         match self.kind {
             RenderTaskKind::Alpha(ref mut task) => task,
-            RenderTaskKind::CachePrimitive(..) |
+            RenderTaskKind::Picture(..) |
             RenderTaskKind::BoxShadow(..) |
             RenderTaskKind::CacheMask(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::HorizontalBlur(..) |
             RenderTaskKind::Alias(..) => unreachable!(),
         }
     }
 
     pub fn as_alpha_batch<'a>(&'a self) -> &'a AlphaRenderTask {
         match self.kind {
             RenderTaskKind::Alpha(ref task) => task,
-            RenderTaskKind::CachePrimitive(..) |
+            RenderTaskKind::Picture(..) |
             RenderTaskKind::BoxShadow(..) |
             RenderTaskKind::CacheMask(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::HorizontalBlur(..) |
             RenderTaskKind::Alias(..) => unreachable!(),
         }
     }
@@ -473,17 +476,17 @@ impl RenderTask {
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                     ],
                 }
             }
-            RenderTaskKind::CachePrimitive(..) | RenderTaskKind::BoxShadow(..) => {
+            RenderTaskKind::Picture(..) | RenderTaskKind::BoxShadow(..) => {
                 let (target_rect, target_index) = self.get_target_rect();
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         target_rect.size.width as f32,
                         target_rect.size.height as f32,
                         target_index.0 as f32,
@@ -575,17 +578,17 @@ impl RenderTask {
                 (DeviceIntRect::new(origin, size), target_index)
             }
         }
     }
 
     pub fn target_kind(&self) -> RenderTargetKind {
         match self.kind {
             RenderTaskKind::Alpha(..) |
-            RenderTaskKind::CachePrimitive(..) |
+            RenderTaskKind::Picture(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::HorizontalBlur(..) => RenderTargetKind::Color,
 
             RenderTaskKind::CacheMask(..) | RenderTaskKind::BoxShadow(..) => {
                 RenderTargetKind::Alpha
             }
 
@@ -599,17 +602,17 @@ impl RenderTask {
     // to all passes (except the first) in the render task tree.
     // To qualify for this, the task needs to have no children / dependencies.
     // Currently, this is only supported for A8 targets, but it can be
     // trivially extended to also support RGBA8 targets in the future
     // if we decide that is useful.
     pub fn is_shared(&self) -> bool {
         match self.kind {
             RenderTaskKind::Alpha(..) |
-            RenderTaskKind::CachePrimitive(..) |
+            RenderTaskKind::Picture(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::HorizontalBlur(..) => false,
 
             RenderTaskKind::CacheMask(..) | RenderTaskKind::BoxShadow(..) => true,
 
             RenderTaskKind::Alias(..) => {
                 panic!("BUG: is_shared() called on aliased task");
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -5,17 +5,17 @@
 //! The webrender API.
 //!
 //! The `webrender::renderer` module provides the interface to webrender, which
 //! is accessible through [`Renderer`][renderer]
 //!
 //! [renderer]: struct.Renderer.html
 
 use api::{channel, BlobImageRenderer, FontRenderMode};
-use api::{ColorF, Epoch, PipelineId, RenderApiSender, RenderNotifier};
+use api::{ColorF, ColorU, Epoch, PipelineId, RenderApiSender, RenderNotifier};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceUintRect, DeviceUintSize};
 use api::{ExternalImageId, ExternalImageType, ImageFormat};
 use api::{YUV_COLOR_SPACES, YUV_FORMATS};
 use api::{YuvColorSpace, YuvFormat};
 #[cfg(not(feature = "debugger"))]
 use api::ApiMsg;
 use api::DebugCommand;
 #[cfg(not(feature = "debugger"))]
@@ -622,17 +622,17 @@ impl SourceTextureResolver {
 
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub enum BlendMode {
     None,
     Alpha,
     PremultipliedAlpha,
 
     // Use the color of the text itself as a constant color blend factor.
-    Subpixel(ColorF),
+    Subpixel(ColorU),
 }
 
 // Tracks the state of each row in the GPU cache texture.
 struct CacheRow {
     is_dirty: bool,
 }
 
 impl CacheRow {
@@ -2742,17 +2742,17 @@ impl Renderer {
                             self.device.set_blend_mode_alpha();
                         }
                         BlendMode::PremultipliedAlpha => {
                             self.device.set_blend(true);
                             self.device.set_blend_mode_premultiplied_alpha();
                         }
                         BlendMode::Subpixel(color) => {
                             self.device.set_blend(true);
-                            self.device.set_blend_mode_subpixel(color);
+                            self.device.set_blend_mode_subpixel(color.into());
                         }
                     }
                     prev_blend_mode = batch.key.blend_mode;
                 }
 
                 if self.debug_flags.contains(ALPHA_PRIM_DBG) {
                     let color = match batch.key.blend_mode {
                         BlendMode::None => ColorF::new(0.3, 0.3, 0.3, 1.0),
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -1,15 +1,15 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AddFont, BlobImageData, BlobImageResources, ResourceUpdate, ResourceUpdates};
 use api::{BlobImageDescriptor, BlobImageError, BlobImageRenderer, BlobImageRequest};
-use api::{ColorF, FontRenderMode, SubpixelDirection};
+use api::{ColorF, FontRenderMode};
 use api::{DevicePoint, DeviceUintRect, DeviceUintSize};
 use api::{Epoch, FontInstance, FontInstanceKey, FontKey, FontTemplate};
 use api::{ExternalImageData, ExternalImageType};
 use api::{FontInstanceOptions, FontInstancePlatformOptions, FontVariation};
 use api::{GlyphDimensions, GlyphKey, IdNamespace};
 use api::{ImageData, ImageDescriptor, ImageKey, ImageRendering};
 use api::{TileOffset, TileSize};
 use app_units::Au;
@@ -332,39 +332,36 @@ impl ResourceCache {
         &mut self,
         instance_key: FontInstanceKey,
         font_key: FontKey,
         glyph_size: Au,
         options: Option<FontInstanceOptions>,
         platform_options: Option<FontInstancePlatformOptions>,
         variations: Vec<FontVariation>,
     ) {
-        let mut requested_render_mode = FontRenderMode::Subpixel;
-        let mut subpx_dir = SubpixelDirection::Horizontal;
-        if let Some(options) = options {
-            if let Some(render_mode) = options.render_mode {
-                requested_render_mode = render_mode;
-            }
-        }
-        if self.glyph_rasterizer.is_bitmap_font(font_key) {
-            requested_render_mode = requested_render_mode.limit_by(FontRenderMode::Bitmap);
-        }
-        if requested_render_mode == FontRenderMode::Mono {
-            subpx_dir = SubpixelDirection::None;
-        }
-        let instance = FontInstance::new(
+        let FontInstanceOptions {
+            render_mode,
+            subpx_dir,
+            synthetic_italics,
+            ..
+        } = options.unwrap_or_default();
+        assert!(render_mode != FontRenderMode::Bitmap);
+        let mut instance = FontInstance::new(
             font_key,
             glyph_size,
             ColorF::new(0.0, 0.0, 0.0, 1.0),
-            requested_render_mode,
+            render_mode,
             subpx_dir,
             platform_options,
             variations,
-            options.map_or(false, |opts| opts.synthetic_italics),
+            synthetic_italics,
         );
+        if self.glyph_rasterizer.is_bitmap_font(&instance) {
+            instance.render_mode = instance.render_mode.limit_by(FontRenderMode::Bitmap);
+        }
         self.resources.font_instances.insert(instance_key, instance);
     }
 
     pub fn delete_font_instance(&mut self, instance_key: FontInstanceKey) {
         self.resources.font_instances.remove(&instance_key);
         if let Some(ref mut r) = self.blob_image_renderer {
             r.delete_font_instance(instance_key);
         }
@@ -559,57 +556,57 @@ impl ResourceCache {
                     key
                 );
             }
         }
     }
 
     pub fn request_glyphs(
         &mut self,
-        font: FontInstance,
+        mut font: FontInstance,
         glyph_keys: &[GlyphKey],
         gpu_cache: &mut GpuCache,
     ) {
         debug_assert_eq!(self.state, State::AddResources);
 
+        self.glyph_rasterizer.prepare_font(&mut font);
         self.glyph_rasterizer.request_glyphs(
             &mut self.cached_glyphs,
             font,
             glyph_keys,
             &mut self.texture_cache,
             gpu_cache,
         );
     }
 
     pub fn pending_updates(&mut self) -> TextureUpdateList {
         self.texture_cache.pending_updates()
     }
 
     pub fn fetch_glyphs<F>(
         &self,
-        font: FontInstance,
+        mut font: FontInstance,
         glyph_keys: &[GlyphKey],
         fetch_buffer: &mut Vec<GlyphFetchResult>,
         gpu_cache: &GpuCache,
         mut f: F,
     ) where
         F: FnMut(SourceTexture, &[GlyphFetchResult]),
     {
         debug_assert_eq!(self.state, State::QueryResources);
+
+        self.glyph_rasterizer.prepare_font(&mut font);
         let glyph_key_cache = self.cached_glyphs.get_glyph_key_cache_for_font(&font);
 
         let mut current_texture_id = SourceTexture::Invalid;
         debug_assert!(fetch_buffer.is_empty());
 
         for (loop_index, key) in glyph_keys.iter().enumerate() {
-            let glyph = glyph_key_cache.get(key);
-            let cache_item = glyph
-                .as_ref()
-                .map(|info| self.texture_cache.get(&info.texture_cache_handle));
-            if let Some(cache_item) = cache_item {
+            if let Some(ref glyph) = *glyph_key_cache.get(key) {
+                let cache_item = self.texture_cache.get(&glyph.texture_cache_handle);
                 if current_texture_id != cache_item.texture_id {
                     if !fetch_buffer.is_empty() {
                         f(current_texture_id, fetch_buffer);
                         fetch_buffer.clear();
                     }
                     current_texture_id = cache_item.texture_id;
                 }
                 fetch_buffer.push(GlyphFetchResult {
@@ -820,17 +817,17 @@ impl ResourceCache {
             };
 
             let entry = self.cached_images.get_mut(&request).unwrap();
             self.texture_cache.update(
                 &mut entry.texture_cache_handle,
                 descriptor,
                 filter,
                 image_data,
-                [0.0; 2],
+                [0.0; 3],
                 image_template.dirty_rect,
                 gpu_cache,
             );
             image_template.dirty_rect = None;
         }
     }
 
     pub fn end_frame(&mut self) {
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -79,34 +79,34 @@ enum EntryKind {
 // cache or a standalone texture.
 #[derive(Debug)]
 struct CacheEntry {
     // Size the requested item, in device pixels.
     size: DeviceUintSize,
     // Details specific to standalone or shared items.
     kind: EntryKind,
     // Arbitrary user data associated with this item.
-    user_data: [f32; 2],
+    user_data: [f32; 3],
     // The last frame this item was requested for rendering.
     last_access: FrameId,
     // Handle to the resource rect in the GPU cache.
     uv_rect_handle: GpuCacheHandle,
     // Image format of the item.
     format: ImageFormat,
     // The actual device texture ID this is part of.
     texture_id: CacheTextureId,
 }
 
 impl CacheEntry {
     // Create a new entry for a standalone texture.
     fn new_standalone(
         texture_id: CacheTextureId,
         size: DeviceUintSize,
         format: ImageFormat,
-        user_data: [f32; 2],
+        user_data: [f32; 3],
         last_access: FrameId,
     ) -> CacheEntry {
         CacheEntry {
             size,
             user_data,
             last_access,
             kind: EntryKind::Standalone,
             texture_id,
@@ -130,17 +130,17 @@ impl CacheEntry {
                 } => (origin, layer_index as f32),
             };
             request.push([
                 origin.x as f32,
                 origin.y as f32,
                 (origin.x + self.size.width) as f32,
                 (origin.y + self.size.height) as f32,
             ]);
-            request.push([layer_index, self.user_data[0], self.user_data[1], 0.0]);
+            request.push([layer_index, self.user_data[0], self.user_data[1], self.user_data[2]]);
         }
     }
 }
 
 type WeakCacheEntryHandle = WeakFreeListHandle<CacheEntry>;
 
 // A texture cache handle is a weak reference to a cache entry.
 // If the handle has not been inserted into the cache yet, the
@@ -270,17 +270,17 @@ impl TextureCache {
 
     // Update the data stored by a given texture cache handle.
     pub fn update(
         &mut self,
         handle: &mut TextureCacheHandle,
         descriptor: ImageDescriptor,
         filter: TextureFilter,
         data: ImageData,
-        user_data: [f32; 2],
+        user_data: [f32; 3],
         mut dirty_rect: Option<DeviceUintRect>,
         gpu_cache: &mut GpuCache,
     ) {
         // Determine if we need to allocate texture cache memory
         // for this item. We need to reallocate if any of the following
         // is true:
         // - Never been in the cache
         // - Has been in the cache but was evicted.
@@ -507,17 +507,17 @@ impl TextureCache {
             }
         }
     }
 
     // Attempt to allocate a block from the shared cache.
     fn allocate_from_shared_cache(
         &mut self,
         descriptor: &ImageDescriptor,
-        user_data: [f32; 2],
+        user_data: [f32; 3],
     ) -> Option<CacheEntry> {
         // Work out which cache it goes in, based on format.
         let texture_array = match descriptor.format {
             ImageFormat::A8 => &mut self.array_a8,
             ImageFormat::BGRA8 => &mut self.array_rgba8,
             ImageFormat::RGB8 => &mut self.array_rgb8,
             ImageFormat::RG8 => &mut self.array_rg8,
             ImageFormat::Invalid | ImageFormat::RGBAF32 => unreachable!(),
@@ -556,17 +556,17 @@ impl TextureCache {
     // Allocate storage for a given image. This attempts to allocate
     // from the shared cache, but falls back to standalone texture
     // if the image is too large, or the cache is full.
     fn allocate(
         &mut self,
         handle: &mut TextureCacheHandle,
         descriptor: ImageDescriptor,
         filter: TextureFilter,
-        user_data: [f32; 2],
+        user_data: [f32; 3],
     ) {
         assert!(descriptor.width > 0 && descriptor.height > 0);
 
         // Work out if this image qualifies to go in the shared (batching) cache.
         let mut allowed_in_shared_cache = true;
         let mut allocated_in_shared_cache = true;
         let mut new_cache_entry = None;
         let size = DeviceUintSize::new(descriptor.width, descriptor.height);
@@ -850,17 +850,17 @@ impl TextureArray {
         }
     }
 
     // Allocate space in this texture array.
     fn alloc(
         &mut self,
         width: u32,
         height: u32,
-        user_data: [f32; 2],
+        user_data: [f32; 3],
         frame_id: FrameId,
     ) -> Option<CacheEntry> {
         // Lazily allocate the regions if not already created.
         // This means that very rarely used image formats can be
         // added but won't allocate a cache if never used.
         if !self.is_allocated {
             debug_assert!(TEXTURE_LAYER_DIMENSIONS % TEXTURE_REGION_DIMENSIONS == 0);
             let regions_per_axis = TEXTURE_LAYER_DIMENSIONS / TEXTURE_REGION_DIMENSIONS;
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -11,27 +11,28 @@ use border::{BorderCornerInstance, Borde
 use clip::{ClipSource, ClipStore};
 use device::Texture;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle, GpuCacheUpdateList};
 use gpu_types::{BlurDirection, BlurInstance, BoxShadowCacheInstance, ClipMaskInstance};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
 use internal_types::{FastHashMap, SourceTexture};
 use internal_types::BatchTextures;
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
-use prim_store::DeferredResolve;
+use prim_store::{DeferredResolve, TextRunMode};
 use profiler::FrameProfileCounters;
 use render_task::{AlphaRenderItem, ClipWorkItem, MaskGeometryKind, MaskSegment};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKey, RenderTaskKind};
 use render_task::{RenderTaskLocation, RenderTaskTree};
 use renderer::BlendMode;
 use renderer::ImageBufferKind;
 use resource_cache::{GlyphFetchResult, ResourceCache};
 use std::{cmp, usize, f32, i32};
 use texture_allocator::GuillotineAllocator;
-use util::{TransformedRect, TransformedRectKind};
+use util::{MatrixHelpers, TransformedRect, TransformedRectKind};
+use euclid::rect;
 
 // Special sentinel value recognized by the shader. It is considered to be
 // a dummy task that doesn't mask out anything.
 const OPAQUE_TASK_ADDRESS: RenderTaskAddress = RenderTaskAddress(i32::MAX as u32);
 const MIN_TARGET_SIZE: u32 = 2048;
 
 trait AlphaBatchHelpers {
     fn get_blend_mode(
@@ -49,27 +50,27 @@ impl AlphaBatchHelpers for PrimitiveStor
     ) -> BlendMode {
         let needs_blending = !metadata.opacity.is_opaque || metadata.clip_task_id.is_some() ||
             transform_kind == TransformedRectKind::Complex;
 
         match metadata.prim_kind {
             PrimitiveKind::TextRun => {
                 let text_run_cpu = &self.cpu_text_runs[metadata.cpu_prim_index.0];
                 match text_run_cpu.font.render_mode {
-                    FontRenderMode::Subpixel => BlendMode::Subpixel(text_run_cpu.color),
+                    FontRenderMode::Subpixel => BlendMode::Subpixel(text_run_cpu.font.color),
                     FontRenderMode::Alpha |
                     FontRenderMode::Mono |
-                    FontRenderMode::Bitmap => BlendMode::Alpha,
+                    FontRenderMode::Bitmap => BlendMode::PremultipliedAlpha,
                 }
             }
             PrimitiveKind::Image |
             PrimitiveKind::AlignedGradient |
             PrimitiveKind::AngleGradient |
             PrimitiveKind::RadialGradient |
-            PrimitiveKind::Shadow => if needs_blending {
+            PrimitiveKind::Picture => if needs_blending {
                 BlendMode::PremultipliedAlpha
             } else {
                 BlendMode::None
             },
             _ => if needs_blending {
                 BlendMode::Alpha
             } else {
                 BlendMode::None
@@ -502,18 +503,17 @@ impl AlphaRenderItem {
                         );
                         let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
                         batch.push(base_instance.build(uv_address.as_int(gpu_cache), 0, 0));
                     }
                     PrimitiveKind::TextRun => {
                         let text_cpu =
                             &ctx.prim_store.cpu_text_runs[prim_metadata.cpu_prim_index.0];
 
-                        let mut font = text_cpu.font.clone();
-                        font.size = font.size.scale_by(ctx.device_pixel_ratio);
+                        let font = text_cpu.get_font(TextRunMode::Normal, ctx.device_pixel_ratio);
 
                         ctx.resource_cache.fetch_glyphs(
                             font,
                             &text_cpu.glyph_keys,
                             glyph_fetch_buffer,
                             gpu_cache,
                             |texture_id, glyphs| {
                                 let textures = BatchTextures {
@@ -523,33 +523,34 @@ impl AlphaRenderItem {
                                         SourceTexture::Invalid,
                                     ],
                                 };
 
                                 let kind = BatchKind::Transformable(
                                     transform_kind,
                                     TransformBatchKind::TextRun,
                                 );
+
                                 let key = BatchKey::new(kind, blend_mode, textures);
                                 let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
 
                                 for glyph in glyphs {
                                     batch.push(base_instance.build(
                                         glyph.index_in_text_run,
                                         glyph.uv_rect_address.as_int(),
                                         0,
                                     ));
                                 }
                             },
                         );
                     }
-                    PrimitiveKind::Shadow => {
-                        let shadow =
-                            &ctx.prim_store.cpu_shadows[prim_metadata.cpu_prim_index.0];
-                        let cache_task_id = shadow.render_task_id.expect("no render task!");
+                    PrimitiveKind::Picture => {
+                        let picture =
+                            &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
+                        let cache_task_id = picture.render_task_id.expect("no render task!");
                         let cache_task_address = render_tasks.get_task_address(cache_task_id);
                         let textures = BatchTextures::render_target_cache();
                         let kind = BatchKind::Transformable(
                             transform_kind,
                             TransformBatchKind::CacheImage,
                         );
                         let key = BatchKey::new(kind, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
@@ -1130,77 +1131,79 @@ impl RenderTarget for ColorRenderTarget 
                 // Find the child render task that we are applying
                 // a horizontal blur on.
                 self.horizontal_blurs.push(BlurInstance {
                     task_address: render_tasks.get_task_address(task_id),
                     src_task_address: render_tasks.get_task_address(task.children[0]),
                     blur_direction: BlurDirection::Horizontal,
                 });
             }
-            RenderTaskKind::CachePrimitive(prim_index) => {
+            RenderTaskKind::Picture(prim_index) => {
                 let prim_metadata = ctx.prim_store.get_metadata(prim_index);
                 let prim_address = prim_metadata.gpu_location.as_int(gpu_cache);
 
                 match prim_metadata.prim_kind {
-                    PrimitiveKind::Shadow => {
-                        let prim = &ctx.prim_store.cpu_shadows[prim_metadata.cpu_prim_index.0];
+                    PrimitiveKind::Picture => {
+                        let prim = &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
 
                         let task_index = render_tasks.get_task_address(task_id);
 
-                        for sub_prim_index in &prim.primitives {
-                            let sub_metadata = ctx.prim_store.get_metadata(*sub_prim_index);
-                            let sub_prim_address =
-                                gpu_cache.get_address(&sub_metadata.gpu_location);
-                            let instance = SimplePrimitiveInstance::new(
-                                sub_prim_address,
-                                task_index,
-                                RenderTaskAddress(0),
-                                PackedLayerIndex(0).into(),
-                                0,
-                            ); // z is disabled for rendering cache primitives
+                        for run in &prim.prim_runs {
+                            for i in 0 .. run.count {
+                                let sub_prim_index = PrimitiveIndex(run.prim_index.0 + i);
 
-                            match sub_metadata.prim_kind {
-                                PrimitiveKind::TextRun => {
-                                    // Add instances that reference the text run GPU location. Also supply
-                                    // the parent shadow prim address as a user data field, allowing
-                                    // the shader to fetch the shadow parameters.
-                                    let text = &ctx.prim_store.cpu_text_runs
-                                        [sub_metadata.cpu_prim_index.0];
-                                    let text_run_cache_prims = &mut self.text_run_cache_prims;
+                                let sub_metadata = ctx.prim_store.get_metadata(sub_prim_index);
+                                let sub_prim_address =
+                                    gpu_cache.get_address(&sub_metadata.gpu_location);
+                                let instance = SimplePrimitiveInstance::new(
+                                    sub_prim_address,
+                                    task_index,
+                                    RenderTaskAddress(0),
+                                    PackedLayerIndex(0).into(),
+                                    0,
+                                ); // z is disabled for rendering cache primitives
 
-                                    let mut font = text.font.clone();
-                                    font.size = font.size.scale_by(ctx.device_pixel_ratio);
-                                    font.render_mode = text.shadow_render_mode;
+                                match sub_metadata.prim_kind {
+                                    PrimitiveKind::TextRun => {
+                                        // Add instances that reference the text run GPU location. Also supply
+                                        // the parent shadow prim address as a user data field, allowing
+                                        // the shader to fetch the shadow parameters.
+                                        let text = &ctx.prim_store.cpu_text_runs
+                                            [sub_metadata.cpu_prim_index.0];
+                                        let text_run_cache_prims = &mut self.text_run_cache_prims;
+
+                                        let font = text.get_font(TextRunMode::Shadow, ctx.device_pixel_ratio);
 
-                                    ctx.resource_cache.fetch_glyphs(
-                                        font,
-                                        &text.glyph_keys,
-                                        &mut self.glyph_fetch_buffer,
-                                        gpu_cache,
-                                        |texture_id, glyphs| {
-                                            let batch = text_run_cache_prims
-                                                .entry(texture_id)
-                                                .or_insert(Vec::new());
+                                        ctx.resource_cache.fetch_glyphs(
+                                            font,
+                                            &text.glyph_keys,
+                                            &mut self.glyph_fetch_buffer,
+                                            gpu_cache,
+                                            |texture_id, glyphs| {
+                                                let batch = text_run_cache_prims
+                                                    .entry(texture_id)
+                                                    .or_insert(Vec::new());
 
-                                            for glyph in glyphs {
-                                                batch.push(instance.build(
-                                                    glyph.index_in_text_run,
-                                                    glyph.uv_rect_address.as_int(),
-                                                    prim_address,
-                                                ));
-                                            }
-                                        },
-                                    );
-                                }
-                                PrimitiveKind::Line => {
-                                    self.line_cache_prims
-                                        .push(instance.build(prim_address, 0, 0));
-                                }
-                                _ => {
-                                    unreachable!("Unexpected sub primitive type");
+                                                for glyph in glyphs {
+                                                    batch.push(instance.build(
+                                                        glyph.index_in_text_run,
+                                                        glyph.uv_rect_address.as_int(),
+                                                        prim_address,
+                                                    ));
+                                                }
+                                            },
+                                        );
+                                    }
+                                    PrimitiveKind::Line => {
+                                        self.line_cache_prims
+                                            .push(instance.build(prim_address, 0, 0));
+                                    }
+                                    _ => {
+                                        unreachable!("Unexpected sub primitive type");
+                                    }
                                 }
                             }
                         }
                     }
                     _ => {
                         // No other primitives make use of primitive caching yet!
                         unreachable!()
                     }
@@ -1250,17 +1253,17 @@ impl RenderTarget for AlphaRenderTarget 
         let task = render_tasks.get(task_id);
         match task.kind {
             RenderTaskKind::Alias(..) => {
                 panic!("BUG: add_task() called on invalidated task");
             }
             RenderTaskKind::Alpha(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::HorizontalBlur(..) |
-            RenderTaskKind::CachePrimitive(..) |
+            RenderTaskKind::Picture(..) |
             RenderTaskKind::Readback(..) => {
                 panic!("Should not be added to alpha target!");
             }
             RenderTaskKind::BoxShadow(prim_index) => {
                 let prim_metadata = ctx.prim_store.get_metadata(prim_index);
 
                 match prim_metadata.prim_kind {
                     PrimitiveKind::BoxShadow => {
@@ -1681,17 +1684,23 @@ impl PackedLayer {
     }
 
     pub fn set_rect(
         &mut self,
         local_rect: &LayerRect,
         screen_rect: &DeviceIntRect,
         device_pixel_ratio: f32,
     ) -> Option<(TransformedRectKind, DeviceIntRect)> {
-        self.local_clip_rect = *local_rect;
+        self.local_clip_rect = if self.transform.has_perspective_component() {
+            // Given a very large rect which means any rect would be inside this rect.
+            // That is, nothing would be clipped.
+            rect(f32::MIN / 2.0, f32::MIN / 2.0, f32::MAX, f32::MAX)
+        } else {
+            *local_rect
+        };
         let xf_rect = TransformedRect::new(local_rect, &self.transform, device_pixel_ratio);
         xf_rect
             .bounding_rect
             .intersection(screen_rect)
             .map(|rect| (xf_rect.kind, rect))
     }
 }
 
--- a/gfx/webrender/src/util.rs
+++ b/gfx/webrender/src/util.rs
@@ -12,16 +12,17 @@ use std::f32::consts::FRAC_1_SQRT_2;
 // Matches the definition of SK_ScalarNearlyZero in Skia.
 const NEARLY_ZERO: f32 = 1.0 / 4096.0;
 
 // TODO: Implement these in euclid!
 pub trait MatrixHelpers<Src, Dst> {
     fn transform_rect(&self, rect: &TypedRect<f32, Src>) -> TypedRect<f32, Dst>;
     fn is_identity(&self) -> bool;
     fn preserves_2d_axis_alignment(&self) -> bool;
+    fn has_perspective_component(&self) -> bool;
     fn inverse_project(&self, target: &TypedPoint2D<f32, Dst>) -> Option<TypedPoint2D<f32, Src>>;
     fn inverse_rect_footprint(&self, rect: &TypedRect<f32, Dst>) -> TypedRect<f32, Src>;
 }
 
 impl<Src, Dst> MatrixHelpers<Src, Dst> for TypedTransform3D<f32, Src, Dst> {
     fn transform_rect(&self, rect: &TypedRect<f32, Src>) -> TypedRect<f32, Dst> {
         let top_left = self.transform_point2d(&rect.origin);
         let top_right = self.transform_point2d(&rect.top_right());
@@ -61,16 +62,20 @@ impl<Src, Dst> MatrixHelpers<Src, Dst> f
         if self.m22.abs() > NEARLY_ZERO {
             col1 += 1;
             row1 += 1;
         }
 
         col0 < 2 && col1 < 2 && row0 < 2 && row1 < 2
     }
 
+    fn has_perspective_component(&self) -> bool {
+         self.m14 != 0.0 || self.m24 != 0.0 || self.m34 != 0.0 || self.m44 != 1.0
+    }
+
     fn inverse_project(&self, target: &TypedPoint2D<f32, Dst>) -> Option<TypedPoint2D<f32, Src>> {
         let m: TypedTransform2D<f32, Src, Dst>;
         m = TypedTransform2D::column_major(
             self.m11 - target.x * self.m14,
             self.m21 - target.x * self.m24,
             self.m41 - target.x * self.m44,
             self.m12 - target.y * self.m14,
             self.m22 - target.y * self.m24,
--- a/gfx/webrender_api/Cargo.toml
+++ b/gfx/webrender_api/Cargo.toml
@@ -1,26 +1,25 @@
 [package]
 name = "webrender_api"
-version = "0.52.0"
+version = "0.52.1"
 authors = ["Glenn Watson <gw@intuitionlibrary.com>"]
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 
 [features]
 nightly = ["euclid/unstable", "serde/unstable"]
 ipc = ["ipc-channel"]
 
 [dependencies]
 app_units = "0.5.6"
 bincode = "0.8"
 bitflags = "0.9"
 byteorder = "1.0"
 euclid = "0.15"
-fxhash = "0.2.1"
 heapsize = ">= 0.3.6, < 0.5"
 ipc-channel = {version = "0.8", optional = true}
 serde = { version = "1.0", features = ["rc", "derive"] }
 time = "0.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.4"
 core-graphics = "0.9"
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -97,18 +97,16 @@ pub enum SpecificDisplayItem {
     Border(BorderDisplayItem),
     BoxShadow(BoxShadowDisplayItem),
     Gradient(GradientDisplayItem),
     RadialGradient(RadialGradientDisplayItem),
     Iframe(IframeDisplayItem),
     PushStackingContext(PushStackingContextDisplayItem),
     PopStackingContext,
     SetGradientStops,
-    PushNestedDisplayList,
-    PopNestedDisplayList,
     PushShadow(Shadow),
     PopShadow,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct ClipDisplayItem {
     pub id: ClipId,
     pub parent_id: ClipId,
@@ -624,28 +622,26 @@ impl BorderRadius {
 
 impl ComplexClipRegion {
     /// Create a new complex clip region.
     pub fn new(rect: LayoutRect, radii: BorderRadius) -> ComplexClipRegion {
         ComplexClipRegion { rect, radii }
     }
 }
 
-pub type NestingIndex = u64;
-
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub enum ClipId {
-    Clip(u64, NestingIndex, PipelineId),
+    Clip(u64, PipelineId),
     ClipExternalId(u64, PipelineId),
     DynamicallyAddedNode(u64, PipelineId),
 }
 
 impl ClipId {
     pub fn root_scroll_node(pipeline_id: PipelineId) -> ClipId {
-        ClipId::Clip(0, 0, pipeline_id)
+        ClipId::Clip(0, pipeline_id)
     }
 
     pub fn root_reference_frame(pipeline_id: PipelineId) -> ClipId {
         ClipId::DynamicallyAddedNode(0, pipeline_id)
     }
 
     pub fn new(id: u64, pipeline_id: PipelineId) -> ClipId {
         // We do this because it is very easy to create accidentally create something that
@@ -654,39 +650,32 @@ impl ClipId {
             return ClipId::root_scroll_node(pipeline_id);
         }
 
         ClipId::ClipExternalId(id, pipeline_id)
     }
 
     pub fn pipeline_id(&self) -> PipelineId {
         match *self {
-            ClipId::Clip(_, _, pipeline_id) |
+            ClipId::Clip(_, pipeline_id) |
             ClipId::ClipExternalId(_, pipeline_id) |
             ClipId::DynamicallyAddedNode(_, pipeline_id) => pipeline_id,
         }
     }
 
     pub fn external_id(&self) -> Option<u64> {
         match *self {
             ClipId::ClipExternalId(id, _) => Some(id),
             _ => None,
         }
     }
 
     pub fn is_root_scroll_node(&self) -> bool {
         match *self {
-            ClipId::Clip(0, 0, _) => true,
-            _ => false,
-        }
-    }
-
-    pub fn is_nested(&self) -> bool {
-        match *self {
-            ClipId::Clip(_, nesting_level, _) => nesting_level != 0,
+            ClipId::Clip(0, _) => true,
             _ => false,
         }
     }
 }
 
 macro_rules! define_empty_heap_size_of {
     ($name:ident) => {
         impl ::heapsize::HeapSizeOf for $name {
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -1,27 +1,29 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use {BorderDetails, BorderDisplayItem, BorderWidths, BoxShadowClipMode, BoxShadowDisplayItem};
 use {ClipAndScrollInfo, ClipDisplayItem, ClipId, ColorF, ComplexClipRegion, DisplayItem};
-use {ExtendMode, FastHashMap, FastHashSet, FilterOp, FontInstanceKey, GlyphIndex, GlyphInstance};
+use {ExtendMode, FilterOp, FontInstanceKey, GlyphInstance};
 use {GlyphOptions, Gradient, GradientDisplayItem, GradientStop, IframeDisplayItem};
 use {ImageDisplayItem, ImageKey, ImageMask, ImageRendering, LayerPrimitiveInfo, LayoutPoint};
 use {LayoutPrimitiveInfo, LayoutRect, LayoutSize, LayoutTransform, LayoutVector2D};
 use {LineDisplayItem, LineOrientation, LineStyle, LocalClip, MixBlendMode, PipelineId};
 use {PropertyBinding, PushStackingContextDisplayItem, RadialGradient, RadialGradientDisplayItem};
 use {RectangleDisplayItem, ScrollFrameDisplayItem, ScrollPolicy, ScrollSensitivity};
 use {SpecificDisplayItem, StackingContext, StickyFrameDisplayItem, StickyFrameInfo};
 use {TextDisplayItem, Shadow, TransformStyle, YuvColorSpace, YuvData};
 use YuvImageDisplayItem;
 use bincode;
 use serde::{Deserialize, Serialize, Serializer};
 use serde::ser::{SerializeMap, SerializeSeq};
+use std::io::Write;
+use std::{io, ptr};
 use std::marker::PhantomData;
 use time::precise_time_ns;
 
 // We don't want to push a long text-run. If a text-run is too long, split it into several parts.
 // Please check the renderer::MAX_VERTEX_TEXTURE_WIDTH for the detail.
 pub const MAX_TEXT_RUN_LENGTH: usize = 2040;
 
 #[repr(C)]
@@ -65,18 +67,16 @@ pub struct BuiltDisplayList {
 #[derive(Copy, Clone, Default, Deserialize, Serialize)]
 pub struct BuiltDisplayListDescriptor {
     /// The first IPC time stamp: before any work has been done
     builder_start_time: u64,
     /// The second IPC time stamp: after serialization
     builder_finish_time: u64,
     /// The third IPC time stamp: just before sending
     send_start_time: u64,
-    /// The offset where DisplayItems stop and the Glyph list starts
-    glyph_offset: usize,
 }
 
 pub struct BuiltDisplayListIter<'a> {
     list: &'a BuiltDisplayList,
     data: &'a [u8],
     cur_item: DisplayItem,
     cur_stops: ItemRange<GradientStop>,
     cur_glyphs: ItemRange<GlyphInstance>,
@@ -84,22 +84,16 @@ pub struct BuiltDisplayListIter<'a> {
     cur_complex_clip: (ItemRange<ComplexClipRegion>, usize),
     peeking: Peek,
 }
 
 pub struct DisplayItemRef<'a: 'b, 'b> {
     iter: &'b BuiltDisplayListIter<'a>,
 }
 
-pub struct GlyphsIter<'a> {
-    list: &'a BuiltDisplayList,
-    data: &'a [u8],
-}
-
-
 #[derive(PartialEq)]
 enum Peek {
     StartPeeking,
     IsPeeking,
     NotPeeking,
 }
 
 #[derive(Clone)]
@@ -120,22 +114,19 @@ impl BuiltDisplayList {
         self.descriptor.send_start_time = precise_time_ns();
         (self.data, self.descriptor)
     }
 
     pub fn data(&self) -> &[u8] {
         &self.data[..]
     }
 
+    // Currently redundant with data, but may be useful if we add extra data to dl
     pub fn item_slice(&self) -> &[u8] {
-        &self.data[.. self.descriptor.glyph_offset]
-    }
-
-    pub fn glyph_slice(&self) -> &[u8] {
-        &self.data[self.descriptor.glyph_offset ..]
+        &self.data[..]
     }
 
     pub fn descriptor(&self) -> &BuiltDisplayListDescriptor {
         &self.descriptor
     }
 
     pub fn times(&self) -> (u64, u64, u64) {
         (
@@ -144,23 +135,16 @@ impl BuiltDisplayList {
             self.descriptor.send_start_time,
         )
     }
 
     pub fn iter(&self) -> BuiltDisplayListIter {
         BuiltDisplayListIter::new(self)
     }
 
-    pub fn glyphs(&self) -> GlyphsIter {
-        GlyphsIter {
-            list: self,
-            data: self.glyph_slice(),
-        }
-    }
-
     pub fn get<'de, T: Deserialize<'de>>(&self, range: ItemRange<T>) -> AuxIter<T> {
         AuxIter::new(&self.data[range.start .. range.start + range.length])
     }
 }
 
 /// Returns the byte-range the slice occupied, and the number of elements
 /// in the slice.
 fn skip_slice<T: for<'de> Deserialize<'de>>(
@@ -307,31 +291,16 @@ impl<'a> BuiltDisplayListIter<'a> {
             self.peeking = Peek::StartPeeking;
             self.next()
         } else {
             Some(self.as_ref())
         }
     }
 }
 
-impl<'a> Iterator for GlyphsIter<'a> {
-    type Item = (FontInstanceKey, ColorF, ItemRange<GlyphIndex>);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.data.len() == 0 {
-            return None;
-        }
-
-        let (font_key, color) = bincode::deserialize_from(&mut self.data, bincode::Infinite)
-            .expect("MEH: malicious process?");
-        let glyph_indices = skip_slice::<GlyphIndex>(self.list, &mut self.data).0;
-        Some((font_key, color, glyph_indices))
-    }
-}
-
 // Some of these might just become ItemRanges
 impl<'a, 'b> DisplayItemRef<'a, 'b> {
     pub fn display_item(&self) -> &DisplayItem {
         &self.iter.cur_item
     }
 
     pub fn rect(&self) -> LayoutRect {
         self.iter.cur_item.info.rect
@@ -478,29 +447,108 @@ impl<'a, 'b> Serialize for DisplayItemRe
                 &self.iter.list.get(gradient_stops).collect::<Vec<_>>(),
             )?;
         }
 
         map.end()
     }
 }
 
+// This is a replacement for bincode::serialize_into(&vec)
+// The default implementation Write for Vec will basically
+// call extend_from_slice(). Serde ends up calling that for every
+// field of a struct that we're serializing. extend_from_slice()
+// does not get inlined and thus we end up calling a generic memcpy()
+// implementation. If we instead reserve enough room for the serialized
+// struct in the Vec ahead of time we can rely on that and use
+// the following UnsafeVecWriter to write into the vec without
+// any checks. This writer assumes that size returned by the
+// serialize function will not change between calls to serialize_into:
+//
+// For example, the following struct will cause memory unsafety when
+// used with UnsafeVecWriter.
+//
+// struct S {
+//    first: Cell<bool>,
+// }
+//
+// impl Serialize for S {
+//    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+//        where S: Serializer
+//    {
+//        if self.first.get() {
+//            self.first.set(false);
+//            ().serialize(serializer)
+//        } else {
+//            0.serialize(serializer)
+//        }
+//    }
+// }
+//
+
+struct UnsafeVecWriter(*mut u8);
+
+impl Write for UnsafeVecWriter {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        unsafe {
+            ptr::copy_nonoverlapping(buf.as_ptr(), self.0, buf.len());
+            self.0 = self.0.offset(buf.len() as isize);
+        }
+        Ok(buf.len())
+    }
+    fn flush(&mut self) -> io::Result<()> { Ok(()) }
+}
+
+struct SizeCounter(usize);
+
+impl<'a> Write for SizeCounter {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        self.0 += buf.len();
+        Ok(buf.len())
+    }
+    fn flush(&mut self) -> io::Result<()> { Ok(()) }
+}
+
+fn serialize_fast<T: Serialize>(vec: &mut Vec<u8>, e: &T) {
+    // manually counting the size is faster than vec.reserve(bincode::serialized_size(&e) as usize) for some reason
+    let mut size = SizeCounter(0);
+    bincode::serialize_into(&mut size,e , bincode::Infinite).unwrap();
+    vec.reserve(size.0);
+
+    let old_len = vec.len();
+    let ptr = unsafe { vec.as_mut_ptr().offset(old_len as isize) };
+    let mut w = UnsafeVecWriter(ptr);
+    bincode::serialize_into(&mut w, e, bincode::Infinite).unwrap();
+
+    // fix up the length
+    unsafe { vec.set_len(old_len + size.0); }
+
+    // make sure we wrote the right amount
+    debug_assert!(((w.0 as usize) - (vec.as_ptr() as usize)) == vec.len());
+}
+
+#[derive(Clone, Debug)]
+pub struct SaveState {
+    dl_len: usize,
+    clip_stack_len: usize,
+    next_clip_id: u64,
+}
+
 #[derive(Clone)]
 pub struct DisplayListBuilder {
     pub data: Vec<u8>,
     pub pipeline_id: PipelineId,
     clip_stack: Vec<ClipAndScrollInfo>,
-    // FIXME: audit whether fast hashers (FNV?) are safe here
-    glyphs: FastHashMap<(FontInstanceKey, ColorF), FastHashSet<GlyphIndex>>,
     next_clip_id: u64,
     builder_start_time: u64,
 
     /// The size of the content of this display list. This is used to allow scrolling
     /// outside the bounds of the display list items themselves.
     content_size: LayoutSize,
+    save_state: Option<SaveState>,
 }
 
 impl DisplayListBuilder {
     pub fn new(pipeline_id: PipelineId, content_size: LayoutSize) -> DisplayListBuilder {
         Self::with_capacity(pipeline_id, content_size, 0)
     }
 
     pub fn with_capacity(
@@ -514,76 +562,105 @@ impl DisplayListBuilder {
         const FIRST_CLIP_ID: u64 = 1;
 
         DisplayListBuilder {
             data: Vec::with_capacity(capacity),
             pipeline_id,
             clip_stack: vec![
                 ClipAndScrollInfo::simple(ClipId::root_scroll_node(pipeline_id)),
             ],
-            glyphs: FastHashMap::default(),
             next_clip_id: FIRST_CLIP_ID,
             builder_start_time: start_time,
             content_size,
+            save_state: None,
         }
     }
 
+    /// Saves the current display list state, so it may be `restore()`'d.
+    ///
+    /// # Conditions:
+    /// 
+    /// * Doesn't support popping clips that were pushed before the save.
+    /// * Doesn't support nested saves.
+    /// * Must call `clear_save()` if the restore becomes unnecessary.
+    pub fn save(&mut self) {
+        assert!(self.save_state.is_none(), "DisplayListBuilder doesn't support nested saves");
+
+        self.save_state = Some(SaveState {
+            clip_stack_len: self.clip_stack.len(),
+            dl_len: self.data.len(),
+            next_clip_id: self.next_clip_id,
+        });
+    }
+
+    /// Restores the state of the builder to when `save()` was last called.
+    pub fn restore(&mut self) {
+        let state = self.save_state.take().expect("No save to restore DisplayListBuilder from");
+
+        self.clip_stack.truncate(state.clip_stack_len);
+        self.data.truncate(state.dl_len);
+        self.next_clip_id = state.next_clip_id;
+    }
+
+    /// Discards the builder's save (indicating the attempted operation was sucessful).
+    pub fn clear_save(&mut self) {
+        self.save_state.take().expect("No save to clear in DisplayListBuilder");
+    }
+
     pub fn print_display_list(&mut self) {
         let mut temp = BuiltDisplayList::default();
         ::std::mem::swap(&mut temp.data, &mut self.data);
 
         {
             let mut iter = BuiltDisplayListIter::new(&temp);
             while let Some(item) = iter.next() {
                 println!("{:?}", item.display_item());
             }
         }
 
         self.data = temp.data;
     }
 
     fn push_item(&mut self, item: SpecificDisplayItem, info: &LayoutPrimitiveInfo) {
-        bincode::serialize_into(
+        serialize_fast(
             &mut self.data,
             &DisplayItem {
                 item,
                 clip_and_scroll: *self.clip_stack.last().unwrap(),
                 info: *info,
             },
-            bincode::Infinite,
-        ).unwrap();
+        )
     }
 
     fn push_new_empty_item(&mut self, item: SpecificDisplayItem) {
         let info = LayoutPrimitiveInfo::new(LayoutRect::zero());
-        bincode::serialize_into(
+        serialize_fast(
             &mut self.data,
             &DisplayItem {
                 item,
                 clip_and_scroll: *self.clip_stack.last().unwrap(),
                 info,
-            },
-            bincode::Infinite,
-        ).unwrap();
+            }
+        )
     }
 
     fn push_iter<I>(&mut self, iter: I)
     where
         I: IntoIterator,
         I::IntoIter: ExactSizeIterator,
         I::Item: Serialize,
     {
         let iter = iter.into_iter();
         let len = iter.len();
         let mut count = 0;
 
-        bincode::serialize_into(&mut self.data, &len, bincode::Infinite).unwrap();
+        serialize_fast(&mut self.data, &len);
         for elem in iter {
             count += 1;
-            bincode::serialize_into(&mut self.data, &elem, bincode::Infinite).unwrap();
+            serialize_fast(&mut self.data, &elem);
         }
 
         debug_assert_eq!(len, count);
     }
 
     pub fn push_rect(&mut self, info: &LayoutPrimitiveInfo, color: ColorF) {
         let item = SpecificDisplayItem::Rectangle(RectangleDisplayItem { color });
         self.push_item(item, info);
@@ -659,39 +736,19 @@ impl DisplayListBuilder {
             color,
             font_key,
             glyph_options,
         });
 
         for split_glyphs in glyphs.chunks(MAX_TEXT_RUN_LENGTH) {
             self.push_item(item, info);
             self.push_iter(split_glyphs);
-
-            // Remember that we've seen these glyphs
-            self.cache_glyphs(
-                font_key,
-                color,
-                split_glyphs.iter().map(|glyph| glyph.index),
-            );
         }
     }
 
-    fn cache_glyphs<I: Iterator<Item = GlyphIndex>>(
-        &mut self,
-        font_key: FontInstanceKey,
-        color: ColorF,
-        glyphs: I,
-    ) {
-        let font_glyphs = self.glyphs
-            .entry((font_key, color))
-            .or_insert(FastHashSet::default());
-
-        font_glyphs.extend(glyphs);
-    }
-
     // Gradients can be defined with stops outside the range of [0, 1]
     // when this happens the gradient needs to be normalized by adjusting
     // the gradient stops and gradient line into an equivalent gradient
     // with stops in the range [0, 1]. this is done by moving the beginning
     // of the gradient line to where stop[0] and the end of the gradient line
     // to stop[n-1]. this function adjusts the stops in place, and returns
     // the amount to adjust the gradient line start and stop
     fn normalize_stops(stops: &mut Vec<GradientStop>, extend_mode: ExtendMode) -> (f32, f32) {
@@ -964,17 +1021,17 @@ impl DisplayListBuilder {
         }
         self.push_new_empty_item(SpecificDisplayItem::SetGradientStops);
         self.push_iter(stops);
     }
 
     fn generate_clip_id(&mut self, id: Option<ClipId>) -> ClipId {
         id.unwrap_or_else(|| {
             self.next_clip_id += 1;
-            ClipId::Clip(self.next_clip_id - 1, 0, self.pipeline_id)
+            ClipId::Clip(self.next_clip_id - 1, self.pipeline_id)
         })
     }
 
     pub fn define_scroll_frame<I>(
         &mut self,
         id: Option<ClipId>,
         content_rect: LayoutRect,
         clip_rect: LayoutRect,
@@ -1099,76 +1156,50 @@ impl DisplayListBuilder {
     }
 
     pub fn push_clip_and_scroll_info(&mut self, info: ClipAndScrollInfo) {
         self.clip_stack.push(info);
     }
 
     pub fn pop_clip_id(&mut self) {
         self.clip_stack.pop();
+        if let Some(save_state) = self.save_state.as_ref() {
+            assert!(self.clip_stack.len() >= save_state.clip_stack_len,
+                    "Cannot pop clips that were pushed before the DisplayListBuilder save.");
+        }
         assert!(self.clip_stack.len() > 0);
     }
 
     pub fn push_iframe(&mut self, info: &LayoutPrimitiveInfo, pipeline_id: PipelineId) {
         let item = SpecificDisplayItem::Iframe(IframeDisplayItem {
             pipeline_id: pipeline_id,
         });
         self.push_item(item, info);
     }
 
-    // Don't use this function. It will go away.
-    //
-    // We're using this method as a hack in Gecko to retain parts sub-parts of display
-    // lists so that we can regenerate them without building Gecko display items. WebRender
-    // will replace references to the root scroll frame id with the current scroll frame
-    // id.
-    pub fn push_nested_display_list(&mut self, built_display_list: &BuiltDisplayList) {
-        self.push_new_empty_item(SpecificDisplayItem::PushNestedDisplayList);
-
-        // Need to read out all the glyph data to update the cache
-        for (font_key, color, glyphs) in built_display_list.glyphs() {
-            self.cache_glyphs(font_key, color, built_display_list.get(glyphs));
-        }
-
-        // Only append the actual items, not any caches
-        self.data.extend_from_slice(built_display_list.item_slice());
-        self.push_new_empty_item(SpecificDisplayItem::PopNestedDisplayList);
-    }
-
     pub fn push_shadow(&mut self, info: &LayoutPrimitiveInfo, shadow: Shadow) {
         self.push_item(SpecificDisplayItem::PushShadow(shadow), info);
     }
 
     pub fn pop_shadow(&mut self) {
         self.push_new_empty_item(SpecificDisplayItem::PopShadow);
     }
 
-    pub fn finalize(mut self) -> (PipelineId, LayoutSize, BuiltDisplayList) {
-        let glyph_offset = self.data.len();
-
-        // Want to use self.push_iter, so can't borrow self
-        let glyphs = ::std::mem::replace(&mut self.glyphs, FastHashMap::default());
-
-        // Append glyph data to the end
-        for ((font_key, color), sub_glyphs) in glyphs {
-            bincode::serialize_into(&mut self.data, &font_key, bincode::Infinite).unwrap();
-            bincode::serialize_into(&mut self.data, &color, bincode::Infinite).unwrap();
-            self.push_iter(sub_glyphs);
-        }
+    pub fn finalize(self) -> (PipelineId, LayoutSize, BuiltDisplayList) {
+        assert!(self.save_state.is_none(), "Finalized DisplayListBuilder with a pending save");
 
         let end_time = precise_time_ns();
 
 
         (
             self.pipeline_id,
             self.content_size,
             BuiltDisplayList {
                 descriptor: BuiltDisplayListDescriptor {
                     builder_start_time: self.builder_start_time,
                     builder_finish_time: end_time,
                     send_start_time: 0,
-                    glyph_offset,
                 },
                 data: self.data,
             },
         )
     }
 }
--- a/gfx/webrender_api/src/font.rs
+++ b/gfx/webrender_api/src/font.rs
@@ -132,16 +132,26 @@ impl FontRenderMode {
         match (self, other) {
             (FontRenderMode::Bitmap, _) | (_, FontRenderMode::Bitmap) => FontRenderMode::Bitmap,
             (FontRenderMode::Subpixel, _) | (_, FontRenderMode::Mono) => other,
             _ => self,
         }
     }
 }
 
+impl SubpixelDirection {
+    // Limit the subpixel direction to what is supported by the render mode.
+    pub fn limit_by(self, render_mode: FontRenderMode) -> SubpixelDirection {
+        match render_mode {
+            FontRenderMode::Mono | FontRenderMode::Bitmap => SubpixelDirection::None,
+            FontRenderMode::Alpha | FontRenderMode::Subpixel => self,
+        }
+    }
+}
+
 #[repr(u8)]
 #[derive(Hash, Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Serialize, Deserialize)]
 pub enum SubpixelOffset {
     Zero = 0,
     Quarter = 1,
     Half = 2,
     ThreeQuarters = 3,
 }
@@ -191,26 +201,111 @@ impl Hash for FontVariation {
 #[derive(Clone, Copy, Debug, Deserialize, Hash, Eq, PartialEq, PartialOrd, Ord, Serialize)]
 pub struct GlyphOptions {
     pub render_mode: FontRenderMode,
 }
 
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, Hash, Eq, PartialEq, PartialOrd, Ord, Serialize)]
 pub struct FontInstanceOptions {
-    pub render_mode: Option<FontRenderMode>,
+    pub render_mode: FontRenderMode,
+    pub subpx_dir: SubpixelDirection,
     pub synthetic_italics: bool,
 }
 
+impl Default for FontInstanceOptions {
+    fn default() -> FontInstanceOptions {
+        FontInstanceOptions {
+            render_mode: FontRenderMode::Subpixel,
+            subpx_dir: SubpixelDirection::Horizontal,
+            synthetic_italics: false,
+        }
+    }
+}
+
+#[cfg(target_os = "windows")]
+#[repr(C)]
+#[derive(Clone, Copy, Debug, Deserialize, Hash, Eq, PartialEq, PartialOrd, Ord, Serialize)]
+pub struct FontInstancePlatformOptions {
+    pub use_embedded_bitmap: bool,
+    pub force_gdi_rendering: bool,
+}
+
+#[cfg(target_os = "windows")]
+impl Default for FontInstancePlatformOptions {
+    fn default() -> FontInstancePlatformOptions {
+        FontInstancePlatformOptions {
+            use_embedded_bitmap: false,
+            force_gdi_rendering: false,
+        }
+    }
+}
+
+#[cfg(target_os = "macos")]
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, Hash, Eq, PartialEq, PartialOrd, Ord, Serialize)]
 pub struct FontInstancePlatformOptions {
-    // These are currently only used on windows for dwrite fonts.
-    pub use_embedded_bitmap: bool,
-    pub force_gdi_rendering: bool,
+    pub unused: u32,
+}
+
+#[cfg(target_os = "macos")]
+impl Default for FontInstancePlatformOptions {
+    fn default() -> FontInstancePlatformOptions {
+        FontInstancePlatformOptions {
+            unused: 0,
+        }
+    }
+}
+
+pub const FONT_FORCE_AUTOHINT: u16  = 0b1;
+pub const FONT_NO_AUTOHINT: u16     = 0b10;
+pub const FONT_EMBEDDED_BITMAP: u16 = 0b100;
+pub const FONT_EMBOLDEN: u16        = 0b1000;
+pub const FONT_VERTICAL_LAYOUT: u16 = 0b10000;
+pub const FONT_SUBPIXEL_BGR: u16    = 0b100000;
+
+#[cfg(not(any(target_os = "macos", target_os = "windows")))]
+#[repr(u8)]
+#[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, PartialOrd, Ord, Serialize)]
+pub enum FontLCDFilter {
+    None,
+    Default,
+    Light,
+    Legacy,
+}
+
+#[cfg(not(any(target_os = "macos", target_os = "windows")))]
+#[repr(u8)]
+#[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, PartialOrd, Ord, Serialize)]
+pub enum FontHinting {
+    None,
+    Mono,
+    Light,
+    Normal,
+    LCD,
+}
+
+#[cfg(not(any(target_os = "macos", target_os = "windows")))]
+#[repr(C)]
+#[derive(Clone, Copy, Debug, Deserialize, Hash, Eq, PartialEq, PartialOrd, Ord, Serialize)]
+pub struct FontInstancePlatformOptions {
+    pub flags: u16,
+    pub lcd_filter: FontLCDFilter,
+    pub hinting: FontHinting,
+}
+
+#[cfg(not(any(target_os = "macos", target_os = "windows")))]
+impl Default for FontInstancePlatformOptions {
+    fn default() -> FontInstancePlatformOptions {
+        FontInstancePlatformOptions {
+            flags: 0,
+            lcd_filter: FontLCDFilter::Default,
+            hinting: FontHinting::LCD,
+        }
+    }
 }
 
 #[derive(Clone, Hash, PartialEq, Eq, Debug, Deserialize, Serialize, Ord, PartialOrd)]
 pub struct FontInstance {
     pub font_key: FontKey,
     // The font size is in *device* pixels, not logical pixels.
     // It is stored as an Au since we need sub-pixel sizes, but
     // can't store as a f32 due to use of this type as a hash key.
@@ -224,36 +319,23 @@ pub struct FontInstance {
     pub variations: Vec<FontVariation>,
     pub synthetic_italics: bool,
 }
 
 impl FontInstance {
     pub fn new(
         font_key: FontKey,
         size: Au,
-        mut color: ColorF,
+        color: ColorF,
         render_mode: FontRenderMode,
         subpx_dir: SubpixelDirection,
         platform_options: Option<FontInstancePlatformOptions>,
         variations: Vec<FontVariation>,
         synthetic_italics: bool,
     ) -> FontInstance {
-        // In alpha/mono mode, the color of the font is irrelevant.
-        // Forcing it to black in those cases saves rasterizing glyphs
-        // of different colors when not needed.
-        match render_mode {
-            FontRenderMode::Alpha | FontRenderMode::Mono => {
-                color = ColorF::new(0.0, 0.0, 0.0, 1.0);
-            }
-            FontRenderMode::Bitmap => {
-                color = ColorF::new(1.0, 1.0, 1.0, 1.0);
-            }
-            FontRenderMode::Subpixel => {}
-        }
-
         FontInstance {
             font_key,
             size,
             color: color.into(),
             render_mode,
             subpx_dir,
             platform_options,
             variations,
--- a/gfx/webrender_api/src/lib.rs
+++ b/gfx/webrender_api/src/lib.rs
@@ -8,17 +8,16 @@
 extern crate app_units;
 extern crate bincode;
 #[macro_use]
 extern crate bitflags;
 extern crate byteorder;
 #[cfg(feature = "nightly")]
 extern crate core;
 extern crate euclid;
-extern crate fxhash;
 #[macro_use]
 extern crate heapsize;
 #[cfg(feature = "ipc")]
 extern crate ipc_channel;
 #[macro_use]
 extern crate serde;
 extern crate time;
 
@@ -41,13 +40,9 @@ mod font;
 mod image;
 
 pub use api::*;
 pub use color::*;
 pub use display_item::*;
 pub use display_list::*;
 pub use font::*;
 pub use image::*;
-use std::collections::{HashMap, HashSet};
-use std::hash::BuildHasherDefault;
 pub use units::*;
-type FastHashMap<K, V> = HashMap<K, V, BuildHasherDefault<fxhash::FxHasher>>;
-type FastHashSet<T> = HashSet<T, BuildHasherDefault<fxhash::FxHasher>>;
--- a/gfx/webrender_bindings/Cargo.toml
+++ b/gfx/webrender_bindings/Cargo.toml
@@ -1,19 +1,19 @@
 [package]
 name = "webrender_bindings"
 version = "0.1.0"
 authors = ["The Mozilla Project Developers"]
 license = "MPL-2.0"
 
 [dependencies]
-webrender_api = {path = "../webrender_api", version = "0.52.0"}
+webrender_api = {path = "../webrender_api", version = "0.52.1"}
 bincode = "0.8"
 rayon = "0.8"
 thread_profiler = "0.1.1"
 euclid = "0.15"
 app_units = "0.5.6"
 gleam = "0.4"
 
 [dependencies.webrender]
 path = "../webrender"
-version = "0.52.0"
+version = "0.52.1"
 default-features = false
--- a/gfx/webrender_bindings/webrender_ffi_generated.h
+++ b/gfx/webrender_bindings/webrender_ffi_generated.h
@@ -54,16 +54,35 @@ enum class ExternalImageType : uint32_t 
   Texture2DArrayHandle = 1,
   TextureRectHandle = 2,
   TextureExternalHandle = 3,
   ExternalBuffer = 4,
 
   Sentinel /* this must be last for serialization purposes. */
 };
 
+enum class FontHinting : uint8_t {
+  None = 0,
+  Mono = 1,
+  Light = 2,
+  Normal = 3,
+  LCD = 4,
+
+  Sentinel /* this must be last for serialization purposes. */
+};
+
+enum class FontLCDFilter : uint8_t {
+  None = 0,
+  Default = 1,
+  Light = 2,
+  Legacy = 3,
+
+  Sentinel /* this must be last for serialization purposes. */
+};
+
 enum class FontRenderMode : uint32_t {
   Mono = 0,
   Alpha = 1,
   Subpixel = 2,
   Bitmap = 3,
 
   Sentinel /* this must be last for serialization purposes. */
 };
@@ -247,24 +266,21 @@ typedef LayerSize LayoutSize;
 // items.
 struct BuiltDisplayListDescriptor {
   // The first IPC time stamp: before any work has been done
   uint64_t builder_start_time;
   // The second IPC time stamp: after serialization
   uint64_t builder_finish_time;
   // The third IPC time stamp: just before sending
   uint64_t send_start_time;
-  // The offset where DisplayItems stop and the Glyph list starts
-  size_t glyph_offset;
 
   bool operator==(const BuiltDisplayListDescriptor& aOther) const {
     return builder_start_time == aOther.builder_start_time &&
            builder_finish_time == aOther.builder_finish_time &&
-           send_start_time == aOther.send_start_time &&
-           glyph_offset == aOther.glyph_offset;
+           send_start_time == aOther.send_start_time;
   }
 };
 
 struct WrVecU8 {
   uint8_t *data;
   size_t length;
   size_t capacity;
 
@@ -747,16 +763,36 @@ struct FontInstancePlatformOptions {
   bool force_gdi_rendering;
 
   bool operator==(const FontInstancePlatformOptions& aOther) const {
     return use_embedded_bitmap == aOther.use_embedded_bitmap &&
            force_gdi_rendering == aOther.force_gdi_rendering;
   }
 };
 
+struct FontInstancePlatformOptions {
+  uint32_t unused;
+
+  bool operator==(const FontInstancePlatformOptions& aOther) const {
+    return unused == aOther.unused;
+  }
+};
+
+struct FontInstancePlatformOptions {
+  uint16_t flags;
+  FontLCDFilter lcd_filter;
+  FontHinting hinting;
+
+  bool operator==(const FontInstancePlatformOptions& aOther) const {
+    return flags == aOther.flags &&
+           lcd_filter == aOther.lcd_filter &&
+           hinting == aOther.hinting;
+  }
+};
+
 /* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen.
  * To generate this file:
  *   1. Get the latest cbindgen using `cargo install --force cbindgen`
  *      a. Alternatively, you can clone `https://github.com/rlhunt/cbindgen` and use a tagged release
  *   2. Run `rustup run nightly cbindgen toolkit/library/rust/ --crate webrender_bindings -o gfx/webrender_bindings/webrender_ffi_generated.h`
  */
 
 extern void gfx_critical_note(const char *aMsg);
--- a/toolkit/library/gtest/rust/Cargo.lock
+++ b/toolkit/library/gtest/rust/Cargo.lock
@@ -1579,17 +1579,17 @@ source = "registry+https://github.com/ru
 dependencies = [
  "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "webrender"
-version = "0.52.0"
+version = "0.52.1"
 dependencies = [
  "app_units 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-foundation 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-graphics 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-text 7.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1601,49 +1601,48 @@ dependencies = [
  "gleam 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "num-traits 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)",
  "plane-split 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rayon 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "thread_profiler 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "time 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
- "webrender_api 0.52.0",
+ "webrender_api 0.52.1",
 ]
 
 [[package]]
 name = "webrender_api"
-version = "0.52.0"
+version = "0.52.1"
 dependencies = [
  "app_units 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-foundation 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-graphics 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "dwrote 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "euclid 0.15.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "heapsize 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "time 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "webrender_bindings"
 version = "0.1.0"
 dependencies = [
  "app_units 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "euclid 0.15.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "gleam 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "rayon 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "thread_profiler 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "webrender 0.52.0",
- "webrender_api 0.52.0",
+ "webrender 0.52.1",
+ "webrender_api 0.52.1",
 ]
 
 [[package]]
 name = "winapi"
 version = "0.2.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
--- a/toolkit/library/rust/Cargo.lock
+++ b/toolkit/library/rust/Cargo.lock
@@ -1591,17 +1591,17 @@ source = "registry+https://github.com/ru
 dependencies = [
  "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "webrender"
-version = "0.52.0"
+version = "0.52.1"
 dependencies = [
  "app_units 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-foundation 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-graphics 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-text 7.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1613,49 +1613,48 @@ dependencies = [
  "gleam 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "num-traits 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)",
  "plane-split 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rayon 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "thread_profiler 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "time 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
- "webrender_api 0.52.0",
+ "webrender_api 0.52.1",
 ]
 
 [[package]]
 name = "webrender_api"
-version = "0.52.0"
+version = "0.52.1"
 dependencies = [
  "app_units 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-foundation 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-graphics 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "dwrote 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "euclid 0.15.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "heapsize 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "time 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "webrender_bindings"
 version = "0.1.0"
 dependencies = [
  "app_units 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "euclid 0.15.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "gleam 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "rayon 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "thread_profiler 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "webrender 0.52.0",
- "webrender_api 0.52.0",
+ "webrender 0.52.1",
+ "webrender_api 0.52.1",
 ]
 
 [[package]]
 name = "winapi"
 version = "0.2.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]