Bug 1424280 - Update webrender to commit f9bc4a5c263e707e3498bea47d3ec9096cc3d099. r?jrmuizel draft
authorKartikaya Gupta <kgupta@mozilla.com>
Sun, 10 Dec 2017 13:48:41 -0500
changeset 710493 ffe2fd92265f32f25b16c6d27b58b1a335d530be
parent 710492 7c6b568981b8db4b9cb8a04f3f0d3dc2fcc575bc
child 710494 4d3325f124c3ea932642fe8835a2909050e9bf48
push id92835
push userkgupta@mozilla.com
push dateSun, 10 Dec 2017 18:58:58 +0000
reviewersjrmuizel
bugs1424280
milestone59.0a1
Bug 1424280 - Update webrender to commit f9bc4a5c263e707e3498bea47d3ec9096cc3d099. r?jrmuizel This includes the re-generated FFI header. MozReview-Commit-ID: 6fvUNwu8ueT
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/examples/animation.rs
gfx/webrender/examples/common/boilerplate.rs
gfx/webrender/examples/document.rs
gfx/webrender/examples/frame_output.rs
gfx/webrender/res/cs_text_run.glsl
gfx/webrender/res/gpu_cache_update.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/src/border.rs
gfx/webrender/src/clip.rs
gfx/webrender/src/device.rs
gfx/webrender/src/glyph_rasterizer.rs
gfx/webrender/src/platform/macos/font.rs
gfx/webrender/src/platform/unix/font.rs
gfx/webrender/src/platform/windows/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender/src/util.rs
gfx/webrender_api/Cargo.toml
gfx/webrender_api/src/display_item.rs
gfx/webrender_api/src/display_list.rs
gfx/webrender_api/src/font.rs
gfx/webrender_bindings/Cargo.toml
gfx/webrender_bindings/webrender_ffi_generated.h
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 
 -------------------------------------------------------------------------------
 
 The version of WebRender currently in the tree is:
-22f472f0adb02bd71c472e426e47182f2b218f6d
+f9bc4a5c263e707e3498bea47d3ec9096cc3d099
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -1,39 +1,39 @@
 [package]
 name = "webrender"
-version = "0.55.0"
+version = "0.56.1"
 authors = ["Glenn Watson <gw@intuitionlibrary.com>"]
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 build = "build.rs"
 
 [features]
 default = ["freetype-lib"]
 freetype-lib = ["freetype/servo-freetype-sys"]
 profiler = ["thread_profiler/thread_profiler"]
 debugger = ["ws", "serde_json", "serde", "serde_derive", "image", "base64"]
 
 [dependencies]
-app_units = "0.5.6"
+app_units = "0.6"
 bincode = "0.9"
 byteorder = "1.0"
-euclid = "0.15.5"
+euclid = "0.16"
 fxhash = "0.2.1"
 gleam = "0.4.15"
 lazy_static = "1"
 log = "0.3"
 num-traits = "0.1.32"
 time = "0.1"
 rayon = "0.8"
 webrender_api = {path = "../webrender_api"}
 bitflags = "1.0"
 thread_profiler = "0.1.1"
-plane-split = "0.6"
-smallvec = "0.5"
+plane-split = "0.7"
+smallvec = "0.6"
 ws = { optional = true, version = "0.7.3" }
 serde_json = { optional = true, version = "1.0" }
 serde = { optional = true, version = "1.0" }
 serde_derive = { optional = true, version = "1.0" }
 image = { optional = true, version = "0.17" }
 base64 = { optional = true, version = "0.3.0" }
 
 [dev-dependencies]
--- a/gfx/webrender/examples/animation.rs
+++ b/gfx/webrender/examples/animation.rs
@@ -14,17 +14,17 @@ extern crate euclid;
 extern crate gleam;
 extern crate glutin;
 extern crate webrender;
 
 #[path = "common/boilerplate.rs"]
 mod boilerplate;
 
 use boilerplate::{Example, HandyDandyRectBuilder};
-use euclid::Radians;
+use euclid::Angle;
 use webrender::api::*;
 
 struct App {
     property_key: PropertyBindingKey<LayoutTransform>,
     opacity_key: PropertyBindingKey<f32>,
     transform: LayoutTransform,
     opacity: f32,
 }
@@ -85,17 +85,17 @@ impl Example for App {
                     glutin::VirtualKeyCode::X => (0.0, 0.0, 0.0, 0.1),
                     _ => return false,
                 };
                 // Update the transform based on the keyboard input and push it to
                 // webrender using the generate_frame API. This will recomposite with
                 // the updated transform.
                 self.opacity += delta_opacity;
                 let new_transform = self.transform
-                    .pre_rotate(0.0, 0.0, 1.0, Radians::new(angle))
+                    .pre_rotate(0.0, 0.0, 1.0, Angle::radians(angle))
                     .post_translate(LayoutVector3D::new(offset_x, offset_y, 0.0));
                 api.generate_frame(
                     document_id,
                     Some(DynamicProperties {
                         transforms: vec![
                             PropertyValue {
                                 key: self.property_key,
                                 value: new_transform,
--- a/gfx/webrender/examples/common/boilerplate.rs
+++ b/gfx/webrender/examples/common/boilerplate.rs
@@ -74,17 +74,17 @@ pub trait Example {
         document_id: DocumentId,
     );
     fn on_event(&mut self, glutin::Event, &RenderApi, DocumentId) -> bool {
         false
     }
     fn get_image_handlers(
         &mut self,
         _gl: &gl::Gl,
-    ) -> (Option<Box<webrender::ExternalImageHandler>>, 
+    ) -> (Option<Box<webrender::ExternalImageHandler>>,
           Option<Box<webrender::OutputImageHandler>>) {
         (None, None)
     }
     fn draw_custom(&self, _gl: &gl::Gl) {
     }
 }
 
 pub fn main_wrapper<E: Example>(
@@ -109,37 +109,39 @@ pub fn main_wrapper<E: Example>(
         })
         .build()
         .unwrap();
 
     unsafe {
         window.make_current().ok();
     }
 
-    let gl = match gl::GlType::default() {
-        gl::GlType::Gl => unsafe {
+    let gl = match window.get_api() {
+        glutin::Api::OpenGl => unsafe {
             gl::GlFns::load_with(|symbol| window.get_proc_address(symbol) as *const _)
         },
-        gl::GlType::Gles => unsafe {
+        glutin::Api::OpenGlEs => unsafe {
             gl::GlesFns::load_with(|symbol| window.get_proc_address(symbol) as *const _)
         },
+        glutin::Api::WebGl => unimplemented!(),
     };
 
     println!("OpenGL version {}", gl.get_string(gl::VERSION));
     println!("Shader resource path: {:?}", res_path);
     let device_pixel_ratio = window.hidpi_factor();
     println!("Device pixel ratio: {}", device_pixel_ratio);
 
     println!("Loading shaders...");
     let opts = webrender::RendererOptions {
         resource_override_path: res_path,
         debug: true,
         precache_shaders: E::PRECACHE_SHADERS,
         device_pixel_ratio,
         clear_color: Some(ColorF::new(0.3, 0.0, 0.0, 1.0)),
+        //scatter_gpu_cache_updates: false,
         ..options.unwrap_or(webrender::RendererOptions::default())
     };
 
     let framebuffer_size = {
         let (width, height) = window.get_inner_size_pixels().unwrap();
         DeviceUintSize::new(width, height)
     };
     let notifier = Box::new(Notifier::new(window.create_window_proxy()));
@@ -154,17 +156,17 @@ pub fn main_wrapper<E: Example>(
     }
 
     if let Some(external_image_handler) = external {
         renderer.set_external_image_handler(external_image_handler);
     }
 
     let epoch = Epoch(0);
     let pipeline_id = PipelineId(0, 0);
-    let layout_size = framebuffer_size.to_f32() / euclid::ScaleFactor::new(device_pixel_ratio);
+    let layout_size = framebuffer_size.to_f32() / euclid::TypedScale::new(device_pixel_ratio);
     let mut builder = DisplayListBuilder::new(pipeline_id, layout_size);
     let mut resources = ResourceUpdates::new();
 
     example.render(
         &api,
         &mut builder,
         &mut resources,
         framebuffer_size,
--- a/gfx/webrender/examples/document.rs
+++ b/gfx/webrender/examples/document.rs
@@ -6,17 +6,17 @@ extern crate euclid;
 extern crate gleam;
 extern crate glutin;
 extern crate webrender;
 
 #[path = "common/boilerplate.rs"]
 mod boilerplate;
 
 use boilerplate::Example;
-use euclid::ScaleFactor;
+use euclid::TypedScale;
 use webrender::api::*;
 
 // This example creates multiple documents overlapping each other with
 // specified layer indices.
 
 struct Document {
     id: DocumentId,
     pipeline_id: PipelineId,
@@ -72,17 +72,17 @@ impl App {
                 bounds,
                 1.0
             );
             api.set_root_pipeline(document_id, pipeline_id);
 
             self.documents.push(Document {
                 id: document_id,
                 pipeline_id,
-                content_rect: bounds.to_f32() / ScaleFactor::new(device_pixel_ratio),
+                content_rect: bounds.to_f32() / TypedScale::new(device_pixel_ratio),
                 color,
             });
         }
     }
 }
 
 impl Example for App {
     fn render(
--- a/gfx/webrender/examples/frame_output.rs
+++ b/gfx/webrender/examples/frame_output.rs
@@ -8,17 +8,17 @@ extern crate glutin;
 extern crate webrender;
 
 #[path = "common/boilerplate.rs"]
 mod boilerplate;
 
 use boilerplate::{Example, HandyDandyRectBuilder};
 use gleam::gl;
 use webrender::api::*;
-use euclid::ScaleFactor;
+use euclid::TypedScale;
 
 // This example demonstrates using the frame output feature to copy
 // the output of a WR framebuffer to a custom texture.
 
 #[derive(Debug)]
 struct Document {
     id: DocumentId,
     pipeline_id: PipelineId,
@@ -88,17 +88,17 @@ impl App {
         let bounds = DeviceUintRect::new(DeviceUintPoint::zero(), framebuffer_size);
         let document_id = api.add_document(framebuffer_size, layer);
 
         api.set_root_pipeline(document_id, pipeline_id);
 
         let document = Document {
             id: document_id,
             pipeline_id,
-            content_rect: bounds.to_f32() / ScaleFactor::new(device_pixel_ratio),
+            content_rect: bounds.to_f32() / TypedScale::new(device_pixel_ratio),
             color,
         };
 
         let info = LayoutPrimitiveInfo::new(document.content_rect);
         let mut builder = DisplayListBuilder::new(
             document.pipeline_id,
             document.content_rect.size,
         );
--- a/gfx/webrender/res/cs_text_run.glsl
+++ b/gfx/webrender/res/cs_text_run.glsl
@@ -13,20 +13,21 @@ flat varying vec4 vColor;
 // as text-shadow.
 
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.specific_prim_address);
 
     int glyph_index = prim.user_data0;
     int resource_address = prim.user_data1;
+    int subpx_dir = prim.user_data2;
 
     Glyph glyph = fetch_glyph(prim.specific_prim_address,
                               glyph_index,
-                              text.subpx_dir);
+                              subpx_dir);
 
     GlyphResource res = fetch_glyph_resource(resource_address);
 
     // Glyph size is already in device-pixels.
     // The render task origin is in device-pixels. Offset that by
     // the glyph offset, relative to its primitive bounding rect.
     vec2 glyph_size = res.uv_rect.zw - res.uv_rect.xy;
     vec2 glyph_pos = res.offset + glyph_size * aPosition.xy;
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/gpu_cache_update.glsl
@@ -0,0 +1,27 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include base
+
+varying vec4 vData;
+
+#ifdef WR_VERTEX_SHADER
+in vec4 aValue;
+in vec2 aPosition;
+
+void main() {
+    vData = aValue;
+    gl_Position = vec4(aPosition * 2.0 - 1.0, 0.0, 1.0);
+    gl_PointSize = 1.0;
+}
+
+#endif //WR_VERTEX_SHADER
+
+#ifdef WR_FRAGMENT_SHADER
+out vec4 oValue;
+
+void main() {
+    oValue = vData;
+}
+#endif //WR_FRAGMENT_SHADER
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -728,22 +728,21 @@ ImageResource fetch_image_resource_direc
     vec4 data[2] = fetch_from_resource_cache_2_direct(address);
     return ImageResource(data[0], data[1].x);
 }
 
 struct TextRun {
     vec4 color;
     vec4 bg_color;
     vec2 offset;
-    int subpx_dir;
 };
 
 TextRun fetch_text_run(int address) {
     vec4 data[3] = fetch_from_resource_cache_3(address);
-    return TextRun(data[0], data[1], data[2].xy, int(data[2].z));
+    return TextRun(data[0], data[1], data[2].xy);
 }
 
 struct Image {
     vec4 stretch_size_and_tile_spacing;  // Size of the actual image and amount of space between
                                          //     tiled instances of this image.
     vec4 sub_rect;                          // If negative, ignored.
 };
 
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -3,26 +3,31 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include shared,prim_shared
 
 flat varying vec4 vColor;
 varying vec3 vUv;
 flat varying vec4 vUvBorder;
 
+#ifdef WR_FEATURE_GLYPH_TRANSFORM
+varying vec4 vUvClip;
+#endif
+
 #ifdef WR_VERTEX_SHADER
 
 #define MODE_ALPHA              0
 #define MODE_SUBPX_CONST_COLOR  1
 #define MODE_SUBPX_PASS0        2
 #define MODE_SUBPX_PASS1        3
 #define MODE_SUBPX_BG_PASS0     4
 #define MODE_SUBPX_BG_PASS1     5
 #define MODE_SUBPX_BG_PASS2     6
-#define MODE_COLOR_BITMAP       7
+#define MODE_BITMAP             7
+#define MODE_COLOR_BITMAP       8
 
 VertexInfo write_text_vertex(vec2 clamped_local_pos,
                              RectWithSize local_clip_rect,
                              float z,
                              Layer layer,
                              PictureTask task,
                              RectWithSize snap_rect) {
     // Transform the current vertex to world space.
@@ -53,20 +58,21 @@ VertexInfo write_text_vertex(vec2 clampe
 }
 
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.specific_prim_address);
 
     int glyph_index = prim.user_data0;
     int resource_address = prim.user_data1;
+    int subpx_dir = prim.user_data2;
 
     Glyph glyph = fetch_glyph(prim.specific_prim_address,
                               glyph_index,
-                              text.subpx_dir);
+                              subpx_dir);
     GlyphResource res = fetch_glyph_resource(resource_address);
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
     // Transform from local space to glyph space.
     mat2 transform = mat2(prim.layer.transform) * uDevicePixelRatio;
 
     // Compute the glyph rect in glyph space.
     RectWithSize glyph_rect = RectWithSize(res.offset + transform * (text.offset + glyph.offset),
@@ -107,29 +113,31 @@ void main(void) {
                                       prim.local_clip_rect,
                                       prim.z,
                                       prim.layer,
                                       prim.task,
                                       glyph_rect);
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
     vec2 f = (transform * vi.local_pos - glyph_rect.p0) / glyph_rect.size;
+    vUvClip = vec4(f, 1.0 - f);
 #else
     vec2 f = (vi.local_pos - glyph_rect.p0) / glyph_rect.size;
 #endif
 
     write_clip(vi.screen_pos, prim.clip_area);
 
 #ifdef WR_FEATURE_SUBPX_BG_PASS1
     vColor = vec4(text.color.a) * text.bg_color;
 #else
     switch (uMode) {
         case MODE_ALPHA:
         case MODE_SUBPX_PASS1:
         case MODE_SUBPX_BG_PASS2:
+        case MODE_BITMAP:
             vColor = text.color;
             break;
         case MODE_SUBPX_CONST_COLOR:
         case MODE_SUBPX_PASS0:
         case MODE_SUBPX_BG_PASS0:
         case MODE_COLOR_BITMAP:
             vColor = vec4(text.color.a);
             break;
@@ -139,26 +147,29 @@ void main(void) {
     }
 #endif
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
 
     vUv = vec3(mix(st0, st1, f), res.layer);
-    vUvBorder = (res.uv_rect + vec4(0.499, 0.499, -0.499, -0.499)) / texture_size.xyxy;
+    vUvBorder = (res.uv_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
-    vec4 mask = texture(sColor0, vUv);
+    vec3 tc = vec3(clamp(vUv.xy, vUvBorder.xy, vUvBorder.zw), vUv.z);
+    vec4 mask = texture(sColor0, tc);
 
-    float alpha = float(all(lessThanEqual(vec4(vUvBorder.xy, vUv.xy), vec4(vUv.xy, vUvBorder.zw))));
-    alpha *= do_clip();
+    float alpha = do_clip();
+#ifdef WR_FEATURE_GLYPH_TRANSFORM
+    alpha *= float(all(greaterThanEqual(vUvClip, vec4(0.0))));
+#endif
 
 #ifdef WR_FEATURE_SUBPX_BG_PASS1
     mask.rgb = vec3(mask.a) - mask.rgb;
 #endif
 
     oFragColor = vColor * mask * alpha;
 }
 #endif
--- a/gfx/webrender/src/border.rs
+++ b/gfx/webrender/src/border.rs
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{BorderSide, BorderStyle, BorderWidths, ClipAndScrollInfo, ColorF};
+use api::{BorderRadius, BorderSide, BorderStyle, BorderWidths, ClipAndScrollInfo, ColorF};
 use api::{LayerPoint, LayerRect};
 use api::{LayerPrimitiveInfo, LayerSize, NormalBorder, RepeatMode};
 use clip::ClipSource;
 use ellipse::Ellipse;
 use frame_builder::FrameBuilder;
 use gpu_cache::GpuDataRequest;
 use prim_store::{BrushAntiAliasMode, BrushSegmentDescriptor, BrushSegmentKind};
 use prim_store::{BorderPrimitiveCpu, PrimitiveContainer, TexelRect};
@@ -222,16 +222,58 @@ impl NormalBorderHelpers for NormalBorde
             BorderStyle::Groove |
             BorderStyle::Ridge |
             BorderStyle::Dashed |
             BorderStyle::Dotted => (BorderEdgeKind::Clip, width),
         }
     }
 }
 
+fn ensure_no_corner_overlap(radius: &mut BorderRadius, info: &LayerPrimitiveInfo) {
+    let mut ratio = 1.0;
+    let top_left_radius = &mut radius.top_left;
+    let top_right_radius = &mut radius.top_right;
+    let bottom_right_radius = &mut radius.bottom_right;
+    let bottom_left_radius = &mut radius.bottom_left;
+
+    let sum = top_left_radius.width + bottom_left_radius.width;
+    if info.rect.size.width < sum {
+        ratio = f32::min(ratio, info.rect.size.width / sum);
+    }
+
+    let sum = top_right_radius.width + bottom_right_radius.width;
+    if info.rect.size.width < sum {
+        ratio = f32::min(ratio, info.rect.size.width / sum);
+    }
+
+    let sum = top_left_radius.height + bottom_left_radius.height;
+    if info.rect.size.height < sum {
+        ratio = f32::min(ratio, info.rect.size.height / sum);
+    }
+
+    let sum = top_right_radius.height + bottom_right_radius.height;
+    if info.rect.size.height < sum {
+        ratio = f32::min(ratio, info.rect.size.height / sum);
+    }
+
+    if ratio < 1. {
+        top_left_radius.width *= ratio;
+        top_left_radius.height *= ratio;
+
+        top_right_radius.width *= ratio;
+        top_right_radius.height *= ratio;
+
+        bottom_left_radius.width *= ratio;
+        bottom_left_radius.height *= ratio;
+
+        bottom_right_radius.width *= ratio;
+        bottom_right_radius.height *= ratio;
+    }
+}
+
 impl FrameBuilder {
     fn add_normal_border_primitive(
         &mut self,
         info: &LayerPrimitiveInfo,
         border: &NormalBorder,
         widths: &BorderWidths,
         clip_and_scroll: ClipAndScrollInfo,
         corner_instances: [BorderCornerInstance; 4],
@@ -305,16 +347,19 @@ impl FrameBuilder {
         // the border with a few rectangles. This generally gives better batching, and
         // a GPU win in fragment shader time.
         // More importantly, the software (OSMesa) implementation we run tests on is
         // particularly slow at running our complex border shader, compared to the
         // rectangle shader. This has the effect of making some of our tests time
         // out more often on CI (the actual cause is simply too many Servo processes and
         // threads being run on CI at once).
 
+        let mut border = *border;
+        ensure_no_corner_overlap(&mut border.radius, &info);
+
         let radius = &border.radius;
         let left = &border.left;
         let right = &border.right;
         let top = &border.top;
         let bottom = &border.bottom;
 
         let corners = [
             border.get_corner(
@@ -473,17 +518,17 @@ impl FrameBuilder {
                     BorderCornerKind::None => {
                         corner_instances[i] = BorderCornerInstance::None;
                     }
                 }
             }
 
             self.add_normal_border_primitive(
                 info,
-                border,
+                &border,
                 widths,
                 clip_and_scroll,
                 corner_instances,
                 extra_clips,
             );
         }
     }
 }
@@ -875,9 +920,9 @@ impl ImageBorderSegment {
 
         ImageBorderSegment {
             geom_rect: rect,
             sub_rect,
             stretch_size: LayerSize::new(stretch_size_x, stretch_size_y),
             tile_spacing,
         }
     }
-}
+}
\ No newline at end of file
--- a/gfx/webrender/src/clip.rs
+++ b/gfx/webrender/src/clip.rs
@@ -5,17 +5,17 @@
 use api::{BorderRadius, ClipMode, ComplexClipRegion, DeviceIntRect, ImageMask, ImageRendering};
 use api::{LayerPoint, LayerRect, LayerToWorldTransform, LayoutPoint, LayoutVector2D, LocalClip};
 use border::BorderCornerClipSource;
 use ellipse::Ellipse;
 use freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle, ToGpuBlocks};
 use prim_store::{ClipData, ImageMaskData};
 use resource_cache::ResourceCache;
-use util::{MaxRect, calculate_screen_bounding_rect, extract_inner_rect_safe};
+use util::{MaxRect, MatrixHelpers, calculate_screen_bounding_rect, extract_inner_rect_safe};
 
 pub type ClipStore = FreeList<ClipSources>;
 pub type ClipSourcesHandle = FreeListHandle<ClipSources>;
 pub type ClipSourcesWeakHandle = WeakFreeListHandle<ClipSources>;
 
 #[derive(Clone, Debug)]
 pub struct ClipRegion {
     pub main: LayerRect,
@@ -246,18 +246,30 @@ impl ClipSources {
         !self.clips.is_empty()
     }
 
     pub fn get_screen_bounds(
         &self,
         transform: &LayerToWorldTransform,
         device_pixel_ratio: f32,
     ) -> (DeviceIntRect, Option<DeviceIntRect>) {
-        let screen_inner_rect =
-            calculate_screen_bounding_rect(transform, &self.local_inner_rect, device_pixel_ratio);
+        // If this translation isn't axis aligned or has a perspective component, don't try to
+        // calculate the inner rectangle. The rectangle that we produce would include potentially
+        // clipped screen area.
+        // TODO(mrobinson): We should eventually try to calculate an inner region or some inner
+        // rectangle so that we can do screen inner rectangle optimizations for these kind of
+        // cilps.
+        let can_calculate_inner_rect =
+            transform.preserves_2d_axis_alignment() && !transform.has_perspective_component();
+        let screen_inner_rect = if can_calculate_inner_rect {
+            calculate_screen_bounding_rect(transform, &self.local_inner_rect, device_pixel_ratio)
+        } else {
+            DeviceIntRect::zero()
+        };
+
         let screen_outer_rect = self.local_outer_rect.map(|outer_rect|
             calculate_screen_bounding_rect(transform, &outer_rect, device_pixel_ratio)
         );
 
         (screen_inner_rect, screen_outer_rect)
     }
 }
 
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -1,15 +1,15 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use super::shader_source;
 use api::{ColorF, ImageFormat};
-use api::{DeviceIntRect, DeviceUintRect, DeviceUintSize};
+use api::{DeviceIntPoint, DeviceIntRect, DeviceUintRect, DeviceUintSize};
 use euclid::Transform3D;
 use gleam::gl;
 use internal_types::{FastHashMap, RenderTargetInfo};
 use smallvec::SmallVec;
 use std::cell::RefCell;
 use std::fs::File;
 use std::io::Read;
 use std::iter::repeat;
@@ -91,16 +91,17 @@ pub enum TextureFilter {
     Nearest,
     Linear,
 }
 
 #[derive(Debug)]
 pub enum VertexAttributeKind {
     F32,
     U8Norm,
+    U16Norm,
     I32,
     U16,
 }
 
 #[derive(Debug)]
 pub struct VertexAttribute {
     pub name: &'static str,
     pub count: u32,
@@ -244,16 +245,17 @@ pub trait FileWatcherHandler: Send {
     fn file_changed(&self, path: PathBuf);
 }
 
 impl VertexAttributeKind {
     fn size_in_bytes(&self) -> u32 {
         match *self {
             VertexAttributeKind::F32 => 4,
             VertexAttributeKind::U8Norm => 1,
+            VertexAttributeKind::U16Norm => 2,
             VertexAttributeKind::I32 => 4,
             VertexAttributeKind::U16 => 2,
         }
     }
 }
 
 impl VertexAttribute {
     fn size_in_bytes(&self) -> u32 {
@@ -287,16 +289,26 @@ impl VertexAttribute {
                     attr_index,
                     self.count as gl::GLint,
                     gl::UNSIGNED_BYTE,
                     true,
                     stride,
                     offset,
                 );
             }
+            VertexAttributeKind::U16Norm => {
+                gl.vertex_attrib_pointer(
+                    attr_index,
+                    self.count as gl::GLint,
+                    gl::UNSIGNED_SHORT,
+                    true,
+                    stride,
+                    offset,
+                );
+            }
             VertexAttributeKind::I32 => {
                 gl.vertex_attrib_i_pointer(
                     attr_index,
                     self.count as gl::GLint,
                     gl::INT,
                     stride,
                     offset,
                 );
@@ -317,49 +329,47 @@ impl VertexAttribute {
 impl VertexDescriptor {
     fn instance_stride(&self) -> u32 {
         self.instance_attributes
             .iter()
             .map(|attr| attr.size_in_bytes())
             .sum()
     }
 
-    fn bind(&self, gl: &gl::Gl, main: VBOId, instance: VBOId) {
-        main.bind(gl);
+    fn bind_attributes(
+        attributes: &[VertexAttribute],
+        start_index: usize,
+        divisor: u32,
+        gl: &gl::Gl,
+        vbo: VBOId,
+    ) {
+        vbo.bind(gl);
 
-        let vertex_stride: u32 = self.vertex_attributes
+        let stride: u32 = attributes
             .iter()
             .map(|attr| attr.size_in_bytes())
             .sum();
-        let mut vertex_offset = 0;
 
-        for (i, attr) in self.vertex_attributes.iter().enumerate() {
-            let attr_index = i as gl::GLuint;
-            attr.bind_to_vao(attr_index, 0, vertex_stride as gl::GLint, vertex_offset, gl);
-            vertex_offset += attr.size_in_bytes();
+        let mut offset = 0;
+        for (i, attr) in attributes.iter().enumerate() {
+            let attr_index = (start_index + i) as gl::GLuint;
+            attr.bind_to_vao(attr_index, divisor, stride as _, offset, gl);
+            offset += attr.size_in_bytes();
         }
+    }
+
+    fn bind(&self, gl: &gl::Gl, main: VBOId, instance: VBOId) {
+        Self::bind_attributes(&self.vertex_attributes, 0, 0, gl, main);
 
         if !self.instance_attributes.is_empty() {
-            instance.bind(gl);
-            let instance_stride = self.instance_stride();
-            let mut instance_offset = 0;
-
-            let base_attr = self.vertex_attributes.len() as u32;
-
-            for (i, attr) in self.instance_attributes.iter().enumerate() {
-                let attr_index = base_attr + i as u32;
-                attr.bind_to_vao(
-                    attr_index,
-                    1,
-                    instance_stride as gl::GLint,
-                    instance_offset,
-                    gl,
-                );
-                instance_offset += attr.size_in_bytes();
-            }
+            Self::bind_attributes(
+                &self.instance_attributes,
+                self.vertex_attributes.len(),
+                1, gl, instance,
+            );
         }
     }
 }
 
 impl VBOId {
     fn bind(&self, gl: &gl::Gl) {
         gl.bind_buffer(gl::ARRAY_BUFFER, self.0);
     }
@@ -376,16 +386,51 @@ impl FBOId {
         let target = match target {
             FBOTarget::Read => gl::READ_FRAMEBUFFER,
             FBOTarget::Draw => gl::DRAW_FRAMEBUFFER,
         };
         gl.bind_framebuffer(target, self.0);
     }
 }
 
+pub struct Stream<'a> {
+    attributes: &'a [VertexAttribute],
+    vbo: VBOId,
+}
+
+pub struct VBO<V> {
+    id: gl::GLuint,
+    target: gl::GLenum,
+    allocated_count: usize,
+    marker: PhantomData<V>,
+}
+
+impl<V> VBO<V> {
+    pub fn allocated_count(&self) -> usize {
+        self.allocated_count
+    }
+
+    pub fn stream_with<'a>(&self, attributes: &'a [VertexAttribute]) -> Stream<'a> {
+        debug_assert_eq!(
+            mem::size_of::<V>(),
+            attributes.iter().map(|a| a.size_in_bytes() as usize).sum::<usize>()
+        );
+        Stream {
+            attributes,
+            vbo: VBOId(self.id),
+        }
+    }
+}
+
+impl<T> Drop for VBO<T> {
+    fn drop(&mut self) {
+        debug_assert!(thread::panicking() || self.id == 0);
+    }
+}
+
 pub struct ExternalTexture {
     id: gl::GLuint,
     target: gl::GLuint,
 }
 
 impl ExternalTexture {
     pub fn new(id: u32, target: TextureTarget) -> ExternalTexture {
         ExternalTexture {
@@ -458,16 +503,29 @@ impl Drop for Program {
     fn drop(&mut self) {
         debug_assert!(
             thread::panicking() || self.id == 0,
             "renderer::deinit not called"
         );
     }
 }
 
+pub struct CustomVAO {
+    id: gl::GLuint,
+}
+
+impl Drop for CustomVAO {
+    fn drop(&mut self) {
+        debug_assert!(
+            thread::panicking() || self.id == 0,
+            "renderer::deinit not called"
+        );
+    }
+}
+
 pub struct VAO {
     id: gl::GLuint,
     ibo_id: IBOId,
     main_vbo_id: VBOId,
     instance_vbo_id: VBOId,
     instance_stride: usize,
     owns_vertices_and_indices: bool,
 }
@@ -745,77 +803,77 @@ impl Device {
         self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
 
         // Default is sampler 0, always
         self.gl.active_texture(gl::TEXTURE0);
 
         self.frame_id
     }
 
+    fn bind_texture_impl(&mut self, slot: TextureSlot, id: gl::GLuint, target: gl::GLenum) {
+        debug_assert!(self.inside_frame);
+
+        if self.bound_textures[slot.0] != id {
+            self.bound_textures[slot.0] = id;
+            self.gl.active_texture(gl::TEXTURE0 + slot.0 as gl::GLuint);
+            self.gl.bind_texture(target, id);
+            self.gl.active_texture(gl::TEXTURE0);
+        }
+    }
+
     pub fn bind_texture<S>(&mut self, sampler: S, texture: &Texture)
     where
         S: Into<TextureSlot>,
     {
-        debug_assert!(self.inside_frame);
-
-        let sampler_index = sampler.into().0;
-        if self.bound_textures[sampler_index] != texture.id {
-            self.bound_textures[sampler_index] = texture.id;
-            self.gl
-                .active_texture(gl::TEXTURE0 + sampler_index as gl::GLuint);
-            self.gl.bind_texture(texture.target, texture.id);
-            self.gl.active_texture(gl::TEXTURE0);
-        }
+        self.bind_texture_impl(sampler.into(), texture.id, texture.target);
     }
 
     pub fn bind_external_texture<S>(&mut self, sampler: S, external_texture: &ExternalTexture)
     where
         S: Into<TextureSlot>,
     {
+        self.bind_texture_impl(sampler.into(), external_texture.id, external_texture.target);
+    }
+
+    fn bind_read_target_impl(&mut self, fbo_id: FBOId) {
         debug_assert!(self.inside_frame);
 
-        let sampler_index = sampler.into().0;
-        if self.bound_textures[sampler_index] != external_texture.id {
-            self.bound_textures[sampler_index] = external_texture.id;
-            self.gl
-                .active_texture(gl::TEXTURE0 + sampler_index as gl::GLuint);
-            self.gl
-                .bind_texture(external_texture.target, external_texture.id);
-            self.gl.active_texture(gl::TEXTURE0);
+        if self.bound_read_fbo != fbo_id {
+            self.bound_read_fbo = fbo_id;
+            fbo_id.bind(self.gl(), FBOTarget::Read);
         }
     }
 
     pub fn bind_read_target(&mut self, texture_and_layer: Option<(&Texture, i32)>) {
-        debug_assert!(self.inside_frame);
-
         let fbo_id = texture_and_layer.map_or(FBOId(self.default_read_fbo), |texture_and_layer| {
             texture_and_layer.0.fbo_ids[texture_and_layer.1 as usize]
         });
 
-        if self.bound_read_fbo != fbo_id {
-            self.bound_read_fbo = fbo_id;
-            fbo_id.bind(self.gl(), FBOTarget::Read);
+        self.bind_read_target_impl(fbo_id)
+    }
+
+    fn bind_draw_target_impl(&mut self, fbo_id: FBOId) {
+        debug_assert!(self.inside_frame);
+
+        if self.bound_draw_fbo != fbo_id {
+            self.bound_draw_fbo = fbo_id;
+            fbo_id.bind(self.gl(), FBOTarget::Draw);
         }
     }
 
     pub fn bind_draw_target(
         &mut self,
         texture_and_layer: Option<(&Texture, i32)>,
         dimensions: Option<DeviceUintSize>,
     ) {
-        debug_assert!(self.inside_frame);
-
         let fbo_id = texture_and_layer.map_or(FBOId(self.default_draw_fbo), |texture_and_layer| {
             texture_and_layer.0.fbo_ids[texture_and_layer.1 as usize]
         });
 
-        if self.bound_draw_fbo != fbo_id {
-            self.bound_draw_fbo = fbo_id;
-            fbo_id.bind(self.gl(), FBOTarget::Draw);
-        }
+        self.bind_draw_target_impl(fbo_id);
 
         if let Some(dimensions) = dimensions {
             self.gl.viewport(
                 0,
                 0,
                 dimensions.width as _,
                 dimensions.height as _,
             );
@@ -884,16 +942,50 @@ impl Device {
             .tex_parameter_i(target, gl::TEXTURE_MIN_FILTER, filter as gl::GLint);
 
         self.gl
             .tex_parameter_i(target, gl::TEXTURE_WRAP_S, gl::CLAMP_TO_EDGE as gl::GLint);
         self.gl
             .tex_parameter_i(target, gl::TEXTURE_WRAP_T, gl::CLAMP_TO_EDGE as gl::GLint);
     }
 
+    /// Resizes a texture with enabled render target views,
+    /// preserves the data by blitting the old texture contents over.
+    pub fn resize_renderable_texture(
+        &mut self,
+        texture: &mut Texture,
+        new_size: DeviceUintSize,
+    ) {
+        debug_assert!(self.inside_frame);
+
+        let old_size = texture.get_dimensions();
+        let old_fbos = mem::replace(&mut texture.fbo_ids, Vec::new());
+        let old_texture_id = mem::replace(&mut texture.id, self.gl.gen_textures(1)[0]);
+
+        texture.width = new_size.width;
+        texture.height = new_size.height;
+        let rt_info = texture.render_target
+            .clone()
+            .expect("Only renderable textures are expected for resize here");
+
+        self.bind_texture(DEFAULT_TEXTURE, texture);
+        self.set_texture_parameters(texture.target, texture.filter);
+        self.update_texture_storage(texture, &rt_info, true);
+
+        let rect = DeviceIntRect::new(DeviceIntPoint::zero(), old_size.to_i32());
+        for (read_fbo, &draw_fbo) in old_fbos.into_iter().zip(&texture.fbo_ids) {
+            self.bind_read_target_impl(read_fbo);
+            self.bind_draw_target_impl(draw_fbo);
+            self.blit_render_target(rect, rect);
+            self.delete_fbo(read_fbo);
+        }
+        self.gl.delete_textures(&[old_texture_id]);
+        self.bind_read_target(None);
+    }
+
     pub fn init_texture(
         &mut self,
         texture: &mut Texture,
         width: u32,
         height: u32,
         format: ImageFormat,
         filter: TextureFilter,
         render_target: Option<RenderTargetInfo>,
@@ -976,38 +1068,55 @@ impl Device {
     /// Updates the texture storage for the texture, creating FBOs as required.
     fn update_texture_storage(
         &mut self,
         texture: &mut Texture,
         rt_info: &RenderTargetInfo,
         is_resized: bool,
     ) {
         assert!(texture.layer_count > 0);
-        assert_eq!(texture.target, gl::TEXTURE_2D_ARRAY);
 
         let needed_layer_count = texture.layer_count - texture.fbo_ids.len() as i32;
         let allocate_color = needed_layer_count != 0 || is_resized;
 
         if allocate_color {
             let (internal_format, gl_format) =
                 gl_texture_formats_for_image_format(&*self.gl, texture.format);
             let type_ = gl_type_for_texture_format(texture.format);
 
-            self.gl.tex_image_3d(
-                texture.target,
-                0,
-                internal_format as gl::GLint,
-                texture.width as gl::GLint,
-                texture.height as gl::GLint,
-                texture.layer_count,
-                0,
-                gl_format,
-                type_,
-                None,
-            );
+            match texture.target {
+                gl::TEXTURE_2D_ARRAY => {
+                    self.gl.tex_image_3d(
+                        texture.target,
+                        0,
+                        internal_format as _,
+                        texture.width as _,
+                        texture.height as _,
+                        texture.layer_count,
+                        0,
+                        gl_format,
+                        type_,
+                        None,
+                    )
+                }
+                _ => {
+                    assert_eq!(texture.layer_count, 1);
+                    self.gl.tex_image_2d(
+                        texture.target,
+                        0,
+                        internal_format as _,
+                        texture.width as _,
+                        texture.height as _,
+                        0,
+                        gl_format,
+                        type_,
+                        None,
+                    )
+                }
+            }
         }
 
         if needed_layer_count > 0 {
             // Create more framebuffers to fill the gap
             let new_fbos = self.gl.gen_framebuffers(needed_layer_count);
             texture
                 .fbo_ids
                 .extend(new_fbos.into_iter().map(FBOId));
@@ -1043,23 +1152,38 @@ impl Device {
                 depth_rb = 0;
                 texture.depth_rb = None;
             }
         }
 
         if allocate_color || allocate_depth {
             for (fbo_index, &fbo_id) in texture.fbo_ids.iter().enumerate() {
                 self.bind_external_draw_target(fbo_id);
-                self.gl.framebuffer_texture_layer(
-                    gl::DRAW_FRAMEBUFFER,
-                    gl::COLOR_ATTACHMENT0,
-                    texture.id,
-                    0,
-                    fbo_index as gl::GLint,
-                );
+                match texture.target {
+                    gl::TEXTURE_2D_ARRAY => {
+                        self.gl.framebuffer_texture_layer(
+                            gl::DRAW_FRAMEBUFFER,
+                            gl::COLOR_ATTACHMENT0,
+                            texture.id,
+                            0,
+                            fbo_index as gl::GLint,
+                        )
+                    }
+                    _ => {
+                        assert_eq!(fbo_index, 0);
+                        self.gl.framebuffer_texture_2d(
+                            gl::DRAW_FRAMEBUFFER,
+                            gl::COLOR_ATTACHMENT0,
+                            texture.target,
+                            texture.id,
+                            0,
+                        )
+                    }
+                }
+
                 self.gl.framebuffer_renderbuffer(
                     gl::DRAW_FRAMEBUFFER,
                     gl::DEPTH_ATTACHMENT,
                     gl::RENDERBUFFER,
                     depth_rb,
                 );
             }
             // restore the previous FBO
@@ -1375,55 +1499,109 @@ impl Device {
         self.gl.read_pixels(
             0, 0,
             width as i32, height as i32,
             gl::RGBA,
             gl::UNSIGNED_BYTE
         )
     }
 
-    pub fn bind_vao(&mut self, vao: &VAO) {
+    fn bind_vao_impl(&mut self, id: gl::GLuint) {
         debug_assert!(self.inside_frame);
 
-        if self.bound_vao != vao.id {
-            self.bound_vao = vao.id;
-            self.gl.bind_vertex_array(vao.id);
+        if self.bound_vao != id {
+            self.bound_vao = id;
+            self.gl.bind_vertex_array(id);
         }
     }
 
+    pub fn bind_vao(&mut self, vao: &VAO) {
+        self.bind_vao_impl(vao.id)
+    }
+
+    pub fn bind_custom_vao(&mut self, vao: &CustomVAO) {
+        self.bind_vao_impl(vao.id)
+    }
+
     fn create_vao_with_vbos(
         &mut self,
         descriptor: &VertexDescriptor,
         main_vbo_id: VBOId,
         instance_vbo_id: VBOId,
         ibo_id: IBOId,
         owns_vertices_and_indices: bool,
     ) -> VAO {
         debug_assert!(self.inside_frame);
 
-        let instance_stride = descriptor.instance_stride();
+        let instance_stride = descriptor.instance_stride() as usize;
         let vao_id = self.gl.gen_vertex_arrays(1)[0];
 
         self.gl.bind_vertex_array(vao_id);
 
         descriptor.bind(self.gl(), main_vbo_id, instance_vbo_id);
         ibo_id.bind(self.gl()); // force it to be a part of VAO
 
-        let vao = VAO {
+        self.gl.bind_vertex_array(0);
+
+        VAO {
             id: vao_id,
             ibo_id,
             main_vbo_id,
             instance_vbo_id,
-            instance_stride: instance_stride as usize,
+            instance_stride,
             owns_vertices_and_indices,
-        };
+        }
+    }
+
+    pub fn create_custom_vao(
+        &mut self,
+        streams: &[Stream],
+    ) -> CustomVAO {
+        debug_assert!(self.inside_frame);
+
+        let vao_id = self.gl.gen_vertex_arrays(1)[0];
+        self.gl.bind_vertex_array(vao_id);
+
+        let mut attrib_index = 0;
+        for stream in streams {
+            VertexDescriptor::bind_attributes(
+                stream.attributes,
+                attrib_index,
+                0,
+                self.gl(),
+                stream.vbo,
+            );
+            attrib_index += stream.attributes.len();
+        }
 
         self.gl.bind_vertex_array(0);
 
-        vao
+        CustomVAO {
+            id: vao_id,
+        }
+    }
+
+    pub fn delete_custom_vao(&mut self, mut vao: CustomVAO) {
+        self.gl.delete_vertex_arrays(&[vao.id]);
+        vao.id = 0;
+    }
+
+    pub fn create_vbo<T>(&mut self) -> VBO<T> {
+        let ids = self.gl.gen_buffers(1);
+        VBO {
+            id: ids[0],
+            target: gl::ARRAY_BUFFER,
+            allocated_count: 0,
+            marker: PhantomData,
+        }
+    }
+
+    pub fn delete_vbo<T>(&mut self, mut vbo: VBO<T>) {
+        self.gl.delete_buffers(&[vbo.id]);
+        vbo.id = 0;
     }
 
     pub fn create_vao(&mut self, descriptor: &VertexDescriptor) -> VAO {
         debug_assert!(self.inside_frame);
 
         let buffer_ids = self.gl.gen_buffers(3);
         let ibo_id = IBOId(buffer_ids[0]);
         let main_vbo_id = VBOId(buffer_ids[1]);
@@ -1439,16 +1617,65 @@ impl Device {
         if vao.owns_vertices_and_indices {
             self.gl.delete_buffers(&[vao.ibo_id.0]);
             self.gl.delete_buffers(&[vao.main_vbo_id.0]);
         }
 
         self.gl.delete_buffers(&[vao.instance_vbo_id.0])
     }
 
+    pub fn allocate_vbo<V>(
+        &mut self,
+        vbo: &mut VBO<V>,
+        count: usize,
+        usage_hint: VertexUsageHint,
+    ) {
+        debug_assert!(self.inside_frame);
+        vbo.allocated_count = count;
+
+        self.gl.bind_buffer(vbo.target, vbo.id);
+        self.gl.buffer_data_untyped(
+            vbo.target,
+            (count * mem::size_of::<V>()) as _,
+            ptr::null(),
+            usage_hint.to_gl(),
+        );
+    }
+
+    pub fn fill_vbo<V>(
+        &mut self,
+        vbo: &VBO<V>,
+        data: &[V],
+        offset: usize,
+    ) {
+        debug_assert!(self.inside_frame);
+        assert!(offset + data.len() <= vbo.allocated_count);
+        let stride = mem::size_of::<V>();
+
+        self.gl.bind_buffer(vbo.target, vbo.id);
+        self.gl.buffer_sub_data_untyped(
+            vbo.target,
+            (offset * stride) as _,
+            (data.len() * stride) as _,
+            data.as_ptr() as _,
+        );
+    }
+
+    fn update_vbo_data<V>(
+        &mut self,
+        vbo: VBOId,
+        vertices: &[V],
+        usage_hint: VertexUsageHint,
+    ) {
+        debug_assert!(self.inside_frame);
+
+        vbo.bind(self.gl());
+        gl::buffer_data(self.gl(), gl::ARRAY_BUFFER, vertices, usage_hint.to_gl());
+    }
+
     pub fn create_vao_with_new_instances(
         &mut self,
         descriptor: &VertexDescriptor,
         base_vao: &VAO,
     ) -> VAO {
         debug_assert!(self.inside_frame);
 
         let buffer_ids = self.gl.gen_buffers(1);
@@ -1464,35 +1691,30 @@ impl Device {
     }
 
     pub fn update_vao_main_vertices<V>(
         &mut self,
         vao: &VAO,
         vertices: &[V],
         usage_hint: VertexUsageHint,
     ) {
-        debug_assert!(self.inside_frame);
         debug_assert_eq!(self.bound_vao, vao.id);
-
-        vao.main_vbo_id.bind(self.gl());
-        gl::buffer_data(self.gl(), gl::ARRAY_BUFFER, vertices, usage_hint.to_gl());
+        self.update_vbo_data(vao.main_vbo_id, vertices, usage_hint)
     }
 
     pub fn update_vao_instances<V>(
         &mut self,
         vao: &VAO,
         instances: &[V],
         usage_hint: VertexUsageHint,
     ) {
-        debug_assert!(self.inside_frame);
         debug_assert_eq!(self.bound_vao, vao.id);
         debug_assert_eq!(vao.instance_stride as usize, mem::size_of::<V>());
 
-        vao.instance_vbo_id.bind(self.gl());
-        gl::buffer_data(self.gl(), gl::ARRAY_BUFFER, instances, usage_hint.to_gl());
+        self.update_vbo_data(vao.instance_vbo_id, instances, usage_hint)
     }
 
     pub fn update_vao_indices<I>(&mut self, vao: &VAO, indices: &[I], usage_hint: VertexUsageHint) {
         debug_assert!(self.inside_frame);
         debug_assert_eq!(self.bound_vao, vao.id);
 
         vao.ibo_id.bind(self.gl());
         gl::buffer_data(
@@ -1518,16 +1740,21 @@ impl Device {
         self.gl.draw_elements(
             gl::TRIANGLES,
             index_count,
             gl::UNSIGNED_INT,
             first_vertex as u32 * 4,
         );
     }
 
+    pub fn draw_nonindexed_points(&mut self, first_vertex: i32, vertex_count: i32) {
+        debug_assert!(self.inside_frame);
+        self.gl.draw_arrays(gl::POINTS, first_vertex, vertex_count);
+    }
+
     pub fn draw_nonindexed_lines(&mut self, first_vertex: i32, vertex_count: i32) {
         debug_assert!(self.inside_frame);
         self.gl.draw_arrays(gl::LINES, first_vertex, vertex_count);
     }
 
     pub fn draw_indexed_triangles_instanced_u16(&mut self, index_count: i32, instance_count: i32) {
         debug_assert!(self.inside_frame);
         self.gl.draw_elements_instanced(
--- a/gfx/webrender/src/glyph_rasterizer.rs
+++ b/gfx/webrender/src/glyph_rasterizer.rs
@@ -175,27 +175,29 @@ impl FontInstance {
     pub fn get_subpx_offset(&self, glyph: &GlyphKey) -> (f64, f64) {
         match self.subpx_dir {
             SubpixelDirection::None => (0.0, 0.0),
             SubpixelDirection::Horizontal => (glyph.subpixel_offset.into(), 0.0),
             SubpixelDirection::Vertical => (0.0, glyph.subpixel_offset.into()),
         }
     }
 
-    pub fn get_glyph_format(&self, color_bitmaps: bool) -> GlyphFormat {
+    pub fn get_alpha_glyph_format(&self) -> GlyphFormat {
+        if self.transform.is_identity() { GlyphFormat::Alpha } else { GlyphFormat::TransformedAlpha }
+    }
+
+    pub fn get_subpixel_glyph_format(&self) -> GlyphFormat {
+        if self.transform.is_identity() { GlyphFormat::Subpixel } else { GlyphFormat::TransformedSubpixel }
+    }
+
+    #[allow(dead_code)]
+    pub fn get_glyph_format(&self) -> GlyphFormat {
         match self.render_mode {
-            FontRenderMode::Mono | FontRenderMode::Alpha => {
-                if self.transform.is_identity() { GlyphFormat::Alpha } else { GlyphFormat::TransformedAlpha }
-            }
-            FontRenderMode::Subpixel => {
-                if self.transform.is_identity() { GlyphFormat::Subpixel } else { GlyphFormat::TransformedSubpixel }
-            }
-            FontRenderMode::Bitmap => {
-                if color_bitmaps { GlyphFormat::ColorBitmap } else { GlyphFormat::Alpha }
-            }
+            FontRenderMode::Mono | FontRenderMode::Alpha => self.get_alpha_glyph_format(),
+            FontRenderMode::Subpixel => self.get_subpixel_glyph_format(),
         }
     }
 
     #[allow(dead_code)]
     pub fn get_extra_strikes(&self, x_scale: f64) -> usize {
         if self.flags.contains(FontInstanceFlags::SYNTHETIC_BOLD) {
             let mut bold_offset = self.size.to_f64_px() / 48.0;
             if bold_offset < 1.0 {
@@ -204,21 +206,23 @@ impl FontInstance {
             (bold_offset * x_scale).max(1.0).round() as usize
         } else {
             0
         }
     }
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+#[allow(dead_code)]
 pub enum GlyphFormat {
     Alpha,
     TransformedAlpha,
     Subpixel,
     TransformedSubpixel,
+    Bitmap,
     ColorBitmap,
 }
 
 pub struct RasterizedGlyph {
     pub top: f32,
     pub left: f32,
     pub width: u32,
     pub height: u32,
@@ -448,22 +452,16 @@ impl GlyphRasterizer {
         font: &FontInstance,
         glyph_key: &GlyphKey,
     ) -> Option<GlyphDimensions> {
         self.font_contexts
             .lock_shared_context()
             .get_glyph_dimensions(font, glyph_key)
     }
 
-    pub fn is_bitmap_font(&self, font: &FontInstance) -> bool {
-        self.font_contexts
-            .lock_shared_context()
-            .is_bitmap_font(font)
-    }
-
     pub fn get_glyph_index(&mut self, font_key: FontKey, ch: char) -> Option<u32> {
         self.font_contexts
             .lock_shared_context()
             .get_glyph_index(font_key, ch)
     }
 
     pub fn resolve_glyphs(
         &mut self,
--- a/gfx/webrender/src/platform/macos/font.rs
+++ b/gfx/webrender/src/platform/macos/font.rs
@@ -17,17 +17,17 @@ use core_graphics::color_space::CGColorS
 use core_graphics::context::{CGContext, CGTextDrawingMode};
 use core_graphics::data_provider::CGDataProvider;
 use core_graphics::font::{CGFont, CGGlyph};
 use core_graphics::geometry::{CGAffineTransform, CGPoint, CGRect, CGSize};
 use core_text;
 use core_text::font::{CTFont, CTFontRef};
 use core_text::font_descriptor::{kCTFontDefaultOrientation, kCTFontColorGlyphsTrait};
 use gamma_lut::{ColorLut, GammaLut};
-use glyph_rasterizer::{FontInstance, RasterizedGlyph};
+use glyph_rasterizer::{FontInstance, GlyphFormat, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::collections::hash_map::Entry;
 use std::sync::Arc;
 
 pub struct FontContext {
     cg_fonts: FastHashMap<FontKey, CGFont>,
     ct_fonts: FastHashMap<(FontKey, Au, Vec<FontVariation>), CTFont>,
     gamma_lut: GammaLut,
@@ -254,16 +254,21 @@ fn new_ct_font_with_variations(cg_font: 
             return ct_font;
         }
         let vals_dict = CFDictionary::from_CFType_pairs(&vals);
         let cg_var_font = cg_font.create_copy_from_variations(&vals_dict).unwrap();
         core_text::font::new_from_CGFont(&cg_var_font, size)
     }
 }
 
+fn is_bitmap_font(ct_font: &CTFont) -> bool {
+    let traits = ct_font.symbolic_traits();
+    (traits & kCTFontColorGlyphsTrait) != 0
+}
+
 impl FontContext {
     pub fn new() -> FontContext {
         debug!("Test for subpixel AA support: {}", supports_subpixel_aa());
 
         // Force CG to use sRGB color space to gamma correct.
         let contrast = 0.0;
         let gamma = 0.0;
 
@@ -346,17 +351,18 @@ impl FontContext {
     pub fn get_glyph_dimensions(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<GlyphDimensions> {
         self.get_ct_font(font.font_key, font.size, &font.variations)
             .and_then(|ref ct_font| {
                 let glyph = key.index as CGGlyph;
-                let (x_offset, y_offset) = font.get_subpx_offset(key);
+                let bitmap = is_bitmap_font(ct_font);
+                let (x_offset, y_offset) = if bitmap { (0.0, 0.0) } else { font.get_subpx_offset(key) };
                 let metrics = get_glyph_metrics(ct_font, None, glyph, x_offset, y_offset, 0.0);
                 if metrics.rasterized_width == 0 || metrics.rasterized_height == 0 {
                     None
                 } else {
                     Some(GlyphDimensions {
                         left: metrics.rasterized_left,
                         top: metrics.rasterized_ascent,
                         width: metrics.rasterized_width as u32,
@@ -399,32 +405,22 @@ impl FontContext {
                 let r = pixel[2];
                 let a = pixel[3];
                 print!("({}, {}, {}, {}) ", r, g, b, a);
             }
             println!("");
         }
     }
 
-    pub fn is_bitmap_font(&mut self, font: &FontInstance) -> bool {
-        match self.get_ct_font(font.font_key, font.size, &font.variations) {
-            Some(ref ct_font) => {
-                let traits = ct_font.symbolic_traits();
-                (traits & kCTFontColorGlyphsTrait) != 0
-            }
-            None => false,
-        }
-    }
-
     pub fn prepare_font(font: &mut FontInstance) {
         match font.render_mode {
-            FontRenderMode::Mono | FontRenderMode::Bitmap => {
-                // In mono/bitmap modes the color of the font is irrelevant.
+            FontRenderMode::Mono => {
+                // In mono mode the color of the font is irrelevant.
                 font.color = ColorU::new(255, 255, 255, 255);
-                // Subpixel positioning is disabled in mono and bitmap modes.
+                // Subpixel positioning is disabled in mono mode.
                 font.subpx_dir = SubpixelDirection::None;
             }
             FontRenderMode::Alpha => {
                 font.color = if font.flags.contains(FontInstanceFlags::FONT_SMOOTHING) {
                     // Only the G channel is used to index grayscale tables,
                     // so use R and B to preserve light/dark determination.
                     let ColorU { g, a, .. } = font.color.luminance_color().quantized_ceil();
                     let rb = if should_use_white_on_black(font.color) { 255 } else { 0 };
@@ -452,39 +448,42 @@ impl FontContext {
     ) -> Option<RasterizedGlyph> {
         let (x_scale, y_scale) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
         let size = font.size.scale_by(y_scale as f32);
         let ct_font = match self.get_ct_font(font.font_key, size, &font.variations) {
             Some(font) => font,
             None => return None,
         };
 
+        let bitmap = is_bitmap_font(&ct_font);
         let shape = font.transform.pre_scale(y_scale.recip() as f32, y_scale.recip() as f32);
-        let transform = if shape.is_identity() {
+        let transform = if bitmap || shape.is_identity() {
             None
         } else {
             Some(CGAffineTransform {
                 a: shape.scale_x as f64,
                 b: -shape.skew_y as f64,
                 c: -shape.skew_x as f64,
                 d: shape.scale_y as f64,
                 tx: 0.0,
                 ty: 0.0
             })
         };
+
         let glyph = key.index as CGGlyph;
-        let (x_offset, y_offset) = font.get_subpx_offset(key);
-        let extra_strikes = font.get_extra_strikes(x_scale);
+        let (x_offset, y_offset) = if bitmap { (0.0, 0.0) } else { font.get_subpx_offset(key) };
+        let (strike_scale, pixel_step) = if bitmap { (y_scale, 1.0) } else { (x_scale, y_scale / x_scale) };
+        let extra_strikes = font.get_extra_strikes(strike_scale);
         let metrics = get_glyph_metrics(
             &ct_font,
             transform.as_ref(),
             glyph,
             x_offset,
             y_offset,
-            extra_strikes as f64 * y_scale / x_scale,
+            extra_strikes as f64 * pixel_step,
         );
         if metrics.rasterized_width == 0 || metrics.rasterized_height == 0 {
             return None;
         }
 
         // The result of this function, in all render modes, is going to be a
         // BGRA surface with white text on transparency using premultiplied
         // alpha. For subpixel text, the RGB values will be the mask value for
@@ -502,24 +501,20 @@ impl FontContext {
         // we still get four bytes per pixel and CG won't mess with the alpha
         // channel after we've stopped calling CG functions. We just need to
         // make sure that we don't look at the alpha values of the pixels that
         // we get from CG, and compute our own alpha value only from RGB.
         // Note that CG requires kCGBitmapByteOrder32Little in order to do
         // subpixel AA at all (which we need it to do in both Subpixel and
         // Alpha+smoothing mode). But little-endian is what we want anyway, so
         // this works out nicely.
-        let context_flags = match font.render_mode {
-            FontRenderMode::Subpixel | FontRenderMode::Alpha |
-            FontRenderMode::Mono => {
-                kCGBitmapByteOrder32Little | kCGImageAlphaNoneSkipFirst
-            }
-            FontRenderMode::Bitmap => {
-                kCGBitmapByteOrder32Little | kCGImageAlphaPremultipliedFirst
-            }
+        let context_flags = if bitmap {
+            kCGBitmapByteOrder32Little | kCGImageAlphaPremultipliedFirst
+        } else {
+            kCGBitmapByteOrder32Little | kCGImageAlphaNoneSkipFirst
         };
 
         let mut cg_context = CGContext::create_bitmap_context(
             None,
             metrics.rasterized_width as usize,
             metrics.rasterized_height as usize,
             8,
             metrics.rasterized_width as usize * 4,
@@ -547,28 +542,30 @@ impl FontContext {
         // uses less font dilation (looks thinner) than dark text.
         // As a consequence, when we ask CG to rasterize with subpixel AA, we
         // will render white-on-black text as opposed to black-on-white text if
         // the text color brightness exceeds a certain threshold. This applies
         // to both the Subpixel and the "Alpha + smoothing" modes, but not to
         // the "Alpha without smoothing" and Mono modes.
         let use_white_on_black = should_use_white_on_black(font.color);
         let use_font_smoothing = font.flags.contains(FontInstanceFlags::FONT_SMOOTHING);
-        let (antialias, smooth, text_color, bg_color, bg_alpha, invert) =
+        let (antialias, smooth, text_color, bg_color, bg_alpha, invert) = if bitmap {
+            (true, false, 0.0, 0.0, 0.0, false)
+        } else {
             match (font.render_mode, use_font_smoothing) {
                 (FontRenderMode::Subpixel, _) |
                 (FontRenderMode::Alpha, true) => if use_white_on_black {
                     (true, true, 1.0, 0.0, 1.0, false)
                 } else {
                     (true, true, 0.0, 1.0, 1.0, true)
                 },
                 (FontRenderMode::Alpha, false) => (true, false, 0.0, 1.0, 1.0, true),
                 (FontRenderMode::Mono, _) => (false, false, 0.0, 1.0, 1.0, true),
-                (FontRenderMode::Bitmap, _) => (true, false, 0.0, 0.0, 0.0, false),
-            };
+            }
+        };
 
         // These are always true in Gecko, even for non-AA fonts
         cg_context.set_allows_font_subpixel_positioning(true);
         cg_context.set_should_subpixel_position_fonts(true);
 
         // Don't quantize because we're doing it already.
         cg_context.set_allows_font_subpixel_quantization(false);
         cg_context.set_should_subpixel_quantize_fonts(false);
@@ -601,29 +598,28 @@ impl FontContext {
         if let Some(transform) = transform {
             cg_context.set_text_matrix(&transform);
 
             draw_origin = draw_origin.apply_transform(&transform.invert());
         }
 
         if extra_strikes > 0 {
             let strikes = 1 + extra_strikes;
-            let pixel_step = y_scale / x_scale;
             let glyphs = vec![glyph; strikes];
             let origins = (0..strikes)
                 .map(|i| CGPoint { x: draw_origin.x + i as f64 * pixel_step, y: draw_origin.y })
                 .collect::<Vec<_>>();
             ct_font.draw_glyphs(&glyphs, &origins, cg_context.clone());
         } else {
             ct_font.draw_glyphs(&[glyph], &[draw_origin], cg_context.clone());
         }
 
         let mut rasterized_pixels = cg_context.data().to_vec();
 
-        if font.render_mode != FontRenderMode::Bitmap {
+        if !bitmap {
             // We rendered text into an opaque surface. The code below needs to
             // ignore the current value of each pixel's alpha channel. But it's
             // allowed to write to the alpha channel, because we're done calling
             // CG functions now.
 
             if smooth {
                 // Convert to linear space for subpixel AA.
                 // We explicitly do not do this for grayscale AA ("Alpha without
@@ -665,14 +661,14 @@ impl FontContext {
             }
         }
 
         Some(RasterizedGlyph {
             left: metrics.rasterized_left as f32,
             top: metrics.rasterized_ascent as f32,
             width: metrics.rasterized_width,
             height: metrics.rasterized_height,
-            scale: 1.0,
-            format: font.get_glyph_format(true),
+            scale: if bitmap { y_scale.recip() as f32 } else { 1.0 },
+            format: if bitmap { GlyphFormat::ColorBitmap } else { font.get_glyph_format() },
             bytes: rasterized_pixels,
         })
     }
 }
--- a/gfx/webrender/src/platform/unix/font.rs
+++ b/gfx/webrender/src/platform/unix/font.rs
@@ -11,18 +11,18 @@ use freetype::freetype::{FT_Done_FreeTyp
 use freetype::freetype::{FT_F26Dot6, FT_Face, FT_Glyph_Format, FT_Long, FT_UInt};
 use freetype::freetype::{FT_GlyphSlot, FT_LcdFilter, FT_New_Face, FT_New_Memory_Face};
 use freetype::freetype::{FT_Init_FreeType, FT_Load_Glyph, FT_Render_Glyph};
 use freetype::freetype::{FT_Library, FT_Outline_Get_CBox, FT_Set_Char_Size, FT_Select_Size};
 use freetype::freetype::{FT_Fixed, FT_Matrix, FT_Set_Transform};
 use freetype::freetype::{FT_LOAD_COLOR, FT_LOAD_DEFAULT, FT_LOAD_FORCE_AUTOHINT};
 use freetype::freetype::{FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH, FT_LOAD_NO_AUTOHINT};
 use freetype::freetype::{FT_LOAD_NO_BITMAP, FT_LOAD_NO_HINTING, FT_LOAD_VERTICAL_LAYOUT};
-use freetype::freetype::{FT_FACE_FLAG_SCALABLE, FT_FACE_FLAG_FIXED_SIZES, FT_Err_Cannot_Render_Glyph};
-use glyph_rasterizer::{FontInstance, RasterizedGlyph};
+use freetype::freetype::{FT_FACE_FLAG_SCALABLE, FT_FACE_FLAG_FIXED_SIZES};
+use glyph_rasterizer::{FontInstance, GlyphFormat, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::{cmp, mem, ptr, slice};
 use std::cmp::max;
 use std::ffi::CString;
 use std::sync::Arc;
 
 // These constants are not present in the freetype
 // bindings due to bindgen not handling the way
@@ -129,17 +129,17 @@ impl FontContext {
                 self.faces.insert(
                     *font_key,
                     Face {
                         face,
                         _bytes: None,
                     },
                 );
             } else {
-                println!("WARN: webrender failed to load font {:?}", font_key);
+                println!("WARN: webrender failed to load font {:?} from path {:?}", font_key, pathname);
             }
         }
     }
 
     pub fn delete_font(&mut self, font_key: &FontKey) {
         if let Some(face) = self.faces.remove(font_key) {
             let result = unsafe { FT_Done_Face(face.face) };
             assert!(result.succeeded());
@@ -180,26 +180,25 @@ impl FontContext {
         }
         if font.flags.contains(FontInstanceFlags::VERTICAL_LAYOUT) {
             load_flags |= FT_LOAD_VERTICAL_LAYOUT;
         }
 
         load_flags |= FT_LOAD_COLOR;
         load_flags |= FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH;
 
+        let (x_scale, y_scale) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
         let req_size = font.size.to_f64_px();
-        let mut result = if font.render_mode == FontRenderMode::Bitmap {
-            if (load_flags & FT_LOAD_NO_BITMAP) != 0 {
-                FT_Error(FT_Err_Cannot_Render_Glyph as i32)
-            } else {
-                unsafe { FT_Set_Transform(face.face, ptr::null_mut(), ptr::null_mut()) };
-                self.choose_bitmap_size(face.face, req_size)
-            }
+        let face_flags = unsafe { (*face.face).face_flags };
+        let mut result = if (face_flags & (FT_FACE_FLAG_FIXED_SIZES as FT_Long)) != 0 &&
+                            (face_flags & (FT_FACE_FLAG_SCALABLE as FT_Long)) == 0 &&
+                            (load_flags & FT_LOAD_NO_BITMAP) == 0 {
+            unsafe { FT_Set_Transform(face.face, ptr::null_mut(), ptr::null_mut()) };
+            self.choose_bitmap_size(face.face, req_size * y_scale)
         } else {
-            let (x_scale, y_scale) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
             let shape = font.transform.pre_scale(x_scale.recip() as f32, y_scale.recip() as f32);
             let mut ft_shape = FT_Matrix {
                 xx: (shape.scale_x * 65536.0) as FT_Fixed,
                 xy: (shape.skew_x * -65536.0) as FT_Fixed,
                 yx: (shape.skew_y * -65536.0) as FT_Fixed,
                 yy: (shape.scale_y * 65536.0) as FT_Fixed,
             };
             unsafe {
@@ -268,18 +267,17 @@ impl FontContext {
 
         // Convert the subpixel offset to floats.
         let (dx, dy) = font.get_subpx_offset(glyph);
 
         // Apply extra pixel of padding for subpixel AA, due to the filter.
         let padding = match font.render_mode {
             FontRenderMode::Subpixel => (self.lcd_extra_pixels * 64) as FT_Pos,
             FontRenderMode::Alpha |
-            FontRenderMode::Mono |
-            FontRenderMode::Bitmap => 0 as FT_Pos,
+            FontRenderMode::Mono => 0 as FT_Pos,
         };
 
         // Offset the bounding box by subpixel positioning.
         // Convert to 26.6 fixed point format for FT.
         match font.subpx_dir {
             SubpixelDirection::None => {}
             SubpixelDirection::Horizontal => {
                 let dx = (dx * 64.0 + 0.5) as FT_Long;
@@ -372,27 +370,16 @@ impl FontContext {
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<GlyphDimensions> {
         let slot = self.load_glyph(font, key);
         slot.and_then(|slot| self.get_glyph_dimensions_impl(slot, font, key, true))
     }
 
-    pub fn is_bitmap_font(&mut self, font: &FontInstance) -> bool {
-        debug_assert!(self.faces.contains_key(&font.font_key));
-        let face = self.faces.get(&font.font_key).unwrap();
-        let face_flags = unsafe { (*face.face).face_flags };
-        // If the face has embedded bitmaps, they should only be used if either
-        // embedded bitmaps are explicitly requested or if the face has no outline.
-        (face_flags & (FT_FACE_FLAG_FIXED_SIZES as FT_Long)) != 0 &&
-            (font.flags.contains(FontInstanceFlags::EMBEDDED_BITMAPS) ||
-                (face_flags & (FT_FACE_FLAG_SCALABLE as FT_Long)) == 0)
-    }
-
     fn choose_bitmap_size(&self, face: FT_Face, requested_size: f64) -> FT_Error {
         let mut best_dist = unsafe { *(*face).available_sizes.offset(0) }.y_ppem as f64 / 64.0 - requested_size;
         let mut best_size = 0;
         let num_fixed_sizes = unsafe { (*face).num_fixed_sizes };
         for i in 1 .. num_fixed_sizes {
             // Distance is positive if strike is larger than desired size,
             // or negative if smaller. If previously a found smaller strike,
             // then prefer a larger strike. Otherwise, minimize distance.
@@ -402,20 +389,20 @@ impl FontContext {
                 best_size = i;
             }
         }
         unsafe { FT_Select_Size(face, best_size) }
     }
 
     pub fn prepare_font(font: &mut FontInstance) {
         match font.render_mode {
-            FontRenderMode::Mono | FontRenderMode::Bitmap => {
-                // In mono/bitmap modes the color of the font is irrelevant.
+            FontRenderMode::Mono => {
+                // In mono mode the color of the font is irrelevant.
                 font.color = ColorU::new(0xFF, 0xFF, 0xFF, 0xFF);
-                // Subpixel positioning is disabled in mono and bitmap modes.
+                // Subpixel positioning is disabled in mono mode.
                 font.subpx_dir = SubpixelDirection::None;
             }
             FontRenderMode::Alpha | FontRenderMode::Subpixel => {
                 // We don't do any preblending with FreeType currently, so the color is not used.
                 font.color = ColorU::new(0xFF, 0xFF, 0xFF, 0xFF);
             }
         }
     }
@@ -455,17 +442,17 @@ impl FontContext {
                 FontLCDFilter::Default => FT_LcdFilter::FT_LCD_FILTER_DEFAULT,
                 FontLCDFilter::Light => FT_LcdFilter::FT_LCD_FILTER_LIGHT,
                 FontLCDFilter::Legacy => FT_LcdFilter::FT_LCD_FILTER_LEGACY,
             };
             unsafe { FT_Library_SetLcdFilter(self.lib, filter) };
         }
         let render_mode = match (font.render_mode, font.subpx_dir) {
             (FontRenderMode::Mono, _) => FT_Render_Mode::FT_RENDER_MODE_MONO,
-            (FontRenderMode::Alpha, _) | (FontRenderMode::Bitmap, _) => FT_Render_Mode::FT_RENDER_MODE_NORMAL,
+            (FontRenderMode::Alpha, _) => FT_Render_Mode::FT_RENDER_MODE_NORMAL,
             (FontRenderMode::Subpixel, SubpixelDirection::Vertical) => FT_Render_Mode::FT_RENDER_MODE_LCD_V,
             (FontRenderMode::Subpixel, _) => FT_Render_Mode::FT_RENDER_MODE_LCD,
         };
         let result = unsafe { FT_Render_Glyph(slot, render_mode) };
         if !result.succeeded() {
             error!(
                 "Unable to rasterize {:?} with {:?}, {:?}",
                 key,
@@ -540,17 +527,17 @@ impl FontContext {
             FT_Pixel_Mode::FT_PIXEL_MODE_GRAY |
             FT_Pixel_Mode::FT_PIXEL_MODE_BGRA => {
                 (bitmap.width as i32, bitmap.rows as i32)
             }
             _ => panic!("Unsupported {:?}", pixel_mode),
         };
         let mut final_buffer = vec![0; (actual_width * actual_height * 4) as usize];
 
-        // Extract the final glyph from FT format into RGBA8 format, which is
+        // Extract the final glyph from FT format into BGRA8 format, which is
         // what WR expects.
         let subpixel_bgr = font.flags.contains(FontInstanceFlags::SUBPIXEL_BGR);
         let mut src_row = bitmap.buffer;
         let mut dest: usize = 0;
         while dest < final_buffer.len() {
             let mut src = src_row;
             let row_end = dest + actual_width as usize * 4;
             match pixel_mode {
@@ -630,23 +617,31 @@ impl FontContext {
                 unsafe {
                     left += (*slot).bitmap_left;
                     top += (*slot).bitmap_top - actual_height;
                 }
             }
             _ => {}
         }
 
+        let glyph_format = match (pixel_mode, format) {
+            (FT_Pixel_Mode::FT_PIXEL_MODE_LCD, _) |
+            (FT_Pixel_Mode::FT_PIXEL_MODE_LCD_V, _) => font.get_subpixel_glyph_format(),
+            (FT_Pixel_Mode::FT_PIXEL_MODE_BGRA, _) => GlyphFormat::ColorBitmap,
+            (_, FT_Glyph_Format::FT_GLYPH_FORMAT_BITMAP) => GlyphFormat::Bitmap,
+            _ => font.get_alpha_glyph_format(),
+        };
+
         Some(RasterizedGlyph {
             left: left as f32,
             top: top as f32,
             width: actual_width as u32,
             height: actual_height as u32,
             scale,
-            format: font.get_glyph_format(pixel_mode == FT_Pixel_Mode::FT_PIXEL_MODE_BGRA),
+            format: glyph_format,
             bytes: final_buffer,
         })
     }
 }
 
 impl Drop for FontContext {
     fn drop(&mut self) {
         unsafe {
--- a/gfx/webrender/src/platform/windows/font.rs
+++ b/gfx/webrender/src/platform/windows/font.rs
@@ -1,17 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{FontInstanceFlags, FontKey, FontRenderMode};
 use api::{ColorU, GlyphDimensions, GlyphKey, SubpixelDirection};
 use dwrote;
 use gamma_lut::{ColorLut, GammaLut};
-use glyph_rasterizer::{FontInstance, RasterizedGlyph};
+use glyph_rasterizer::{FontInstance, GlyphFormat, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::collections::hash_map::Entry;
 use std::sync::Arc;
 
 lazy_static! {
     static ref DEFAULT_FONT_DESCRIPTOR: dwrote::FontDescriptor = dwrote::FontDescriptor {
         family_name: "Arial".to_owned(),
         weight: dwrote::FontWeight::Regular,
@@ -29,61 +29,68 @@ pub struct FontContext {
 
 // DirectWrite is safe to use on multiple threads and non-shareable resources are
 // all hidden inside their font context.
 unsafe impl Send for FontContext {}
 
 fn dwrite_texture_type(render_mode: FontRenderMode) -> dwrote::DWRITE_TEXTURE_TYPE {
     match render_mode {
         FontRenderMode::Mono => dwrote::DWRITE_TEXTURE_ALIASED_1x1,
-        FontRenderMode::Bitmap |
         FontRenderMode::Alpha |
         FontRenderMode::Subpixel => dwrote::DWRITE_TEXTURE_CLEARTYPE_3x1,
     }
 }
 
 fn dwrite_measure_mode(
     font: &FontInstance,
+    bitmaps: bool,
 ) -> dwrote::DWRITE_MEASURING_MODE {
-    if font.flags.contains(FontInstanceFlags::FORCE_GDI) {
+    if bitmaps || font.flags.contains(FontInstanceFlags::FORCE_GDI) {
         dwrote::DWRITE_MEASURING_MODE_GDI_CLASSIC
     } else {
       match font.render_mode {
-          FontRenderMode::Mono | FontRenderMode::Bitmap => dwrote::DWRITE_MEASURING_MODE_GDI_CLASSIC,
+          FontRenderMode::Mono => dwrote::DWRITE_MEASURING_MODE_GDI_CLASSIC,
           FontRenderMode::Alpha | FontRenderMode::Subpixel => dwrote::DWRITE_MEASURING_MODE_NATURAL,
       }
     }
 }
 
 fn dwrite_render_mode(
     font_face: &dwrote::FontFace,
     font: &FontInstance,
     em_size: f32,
     measure_mode: dwrote::DWRITE_MEASURING_MODE,
+    bitmaps: bool,
 ) -> dwrote::DWRITE_RENDERING_MODE {
     let dwrite_render_mode = match font.render_mode {
-        FontRenderMode::Bitmap => dwrote::DWRITE_RENDERING_MODE_GDI_CLASSIC,
         FontRenderMode::Mono => dwrote::DWRITE_RENDERING_MODE_ALIASED,
         FontRenderMode::Alpha | FontRenderMode::Subpixel => {
-            if font.flags.contains(FontInstanceFlags::FORCE_GDI) {
+            if bitmaps || font.flags.contains(FontInstanceFlags::FORCE_GDI) {
                 dwrote::DWRITE_RENDERING_MODE_GDI_CLASSIC
             } else {
                 font_face.get_recommended_rendering_mode_default_params(em_size, 1.0, measure_mode)
             }
         }
     };
 
     if dwrite_render_mode == dwrote::DWRITE_RENDERING_MODE_OUTLINE {
         // Outline mode is not supported
         return dwrote::DWRITE_RENDERING_MODE_CLEARTYPE_NATURAL_SYMMETRIC;
     }
 
     dwrite_render_mode
 }
 
+fn is_bitmap_font(font: &FontInstance) -> bool {
+    // If bitmaps are requested, then treat as a bitmap font to disable transforms.
+    // If mono AA is requested, let that take priority over using bitmaps.
+    font.render_mode != FontRenderMode::Mono &&
+        font.flags.contains(FontInstanceFlags::EMBEDDED_BITMAPS)
+}
+
 impl FontContext {
     pub fn new() -> FontContext {
         // These are the default values we use in Gecko.
         // We use a gamma value of 2.3 for gdi fonts
         // TODO: Fetch this data from Gecko itself.
         let contrast = 1.0;
         let gamma = 1.8;
         let gdi_gamma = 2.3;
@@ -171,218 +178,223 @@ impl FontContext {
             }
         }
     }
 
     fn create_glyph_analysis(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
+        size: f32,
+        transform: Option<dwrote::DWRITE_MATRIX>,
+        bitmaps: bool,
     ) -> dwrote::GlyphRunAnalysis {
         let face = self.get_font_face(font);
         let glyph = key.index as u16;
         let advance = 0.0f32;
         let offset = dwrote::GlyphOffset {
             advanceOffset: 0.0,
             ascenderOffset: 0.0,
         };
 
-        let (.., y_scale) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
-        let size = (font.size.to_f64_px() * y_scale) as f32;
-
         let glyph_run = dwrote::DWRITE_GLYPH_RUN {
             fontFace: unsafe { face.as_ptr() },
             fontEmSize: size, // size in DIPs (1/96", same as CSS pixels)
             glyphCount: 1,
             glyphIndices: &glyph,
             glyphAdvances: &advance,
             glyphOffsets: &offset,
             isSideways: 0,
             bidiLevel: 0,
         };
 
-        let dwrite_measure_mode = dwrite_measure_mode(font);
+        let dwrite_measure_mode = dwrite_measure_mode(font, bitmaps);
         let dwrite_render_mode = dwrite_render_mode(
             face,
             font,
             size,
             dwrite_measure_mode,
+            bitmaps,
         );
 
-        let (x_offset, y_offset) = font.get_subpx_offset(key);
-        let shape = font.transform.pre_scale(y_scale.recip() as f32, y_scale.recip() as f32);
-        let transform = dwrote::DWRITE_MATRIX {
-            m11: shape.scale_x,
-            m12: shape.skew_y,
-            m21: shape.skew_x,
-            m22: shape.scale_y,
-            dx: x_offset as f32,
-            dy: y_offset as f32,
-        };
-
         dwrote::GlyphRunAnalysis::create(
             &glyph_run,
             1.0,
-            Some(transform),
+            transform,
             dwrite_render_mode,
             dwrite_measure_mode,
             0.0,
             0.0,
         )
     }
 
     pub fn get_glyph_index(&mut self, font_key: FontKey, ch: char) -> Option<u32> {
         let face = self.fonts.get(&font_key).unwrap();
         let indices = face.get_glyph_indices(&[ch as u32]);
         indices.first().map(|idx| *idx as u32)
     }
 
-    // TODO: Pipe GlyphOptions into glyph_dimensions too
     pub fn get_glyph_dimensions(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<GlyphDimensions> {
-        // Probably have to default to something else here.
-        let render_mode = FontRenderMode::Subpixel;
-        let analysis = self.create_glyph_analysis(font, key);
+        let size = font.size.to_f32_px();
+        let bitmaps = is_bitmap_font(font);
+        let analysis = self.create_glyph_analysis(font, key, size, None, bitmaps);
 
-        let texture_type = dwrite_texture_type(render_mode);
+        let texture_type = dwrite_texture_type(font.render_mode);
 
         let bounds = analysis.get_alpha_texture_bounds(texture_type);
 
         let width = (bounds.right - bounds.left) as u32;
         let height = (bounds.bottom - bounds.top) as u32;
 
         // Alpha texture bounds can sometimes return an empty rect
         // Such as for spaces
         if width == 0 || height == 0 {
             return None;
         }
 
         let face = self.get_font_face(font);
         face.get_design_glyph_metrics(&[key.index as u16], false)
             .first()
             .map(|metrics| {
-                let em_size = font.size.to_f32_px() / 16.;
+                let em_size = size / 16.;
                 let design_units_per_pixel = face.metrics().designUnitsPerEm as f32 / 16. as f32;
                 let scaled_design_units_to_pixels = em_size / design_units_per_pixel;
                 let advance = metrics.advanceWidth as f32 * scaled_design_units_to_pixels;
 
                 GlyphDimensions {
                     left: bounds.left,
                     top: -bounds.top,
                     width,
                     height,
                     advance: advance,
                 }
             })
     }
 
     // DWrite ClearType gives us values in RGB, but WR expects BGRA.
-    fn convert_to_bgra(&self, pixels: &[u8], render_mode: FontRenderMode) -> Vec<u8> {
-        match render_mode {
-            FontRenderMode::Mono => {
+    fn convert_to_bgra(
+        &self,
+        pixels: &[u8],
+        render_mode: FontRenderMode,
+        bitmaps: bool,
+    ) -> Vec<u8> {
+        match (render_mode, bitmaps) {
+            (FontRenderMode::Mono, _) => {
                 let mut bgra_pixels: Vec<u8> = vec![0; pixels.len() * 4];
                 for i in 0 .. pixels.len() {
                     let alpha = pixels[i];
                     bgra_pixels[i * 4 + 0] = alpha;
                     bgra_pixels[i * 4 + 1] = alpha;
                     bgra_pixels[i * 4 + 2] = alpha;
                     bgra_pixels[i * 4 + 3] = alpha;
                 }
                 bgra_pixels
             }
-            FontRenderMode::Alpha | FontRenderMode::Bitmap => {
+            (FontRenderMode::Alpha, _) | (_, true) => {
                 let length = pixels.len() / 3;
                 let mut bgra_pixels: Vec<u8> = vec![0; length * 4];
                 for i in 0 .. length {
                     // Only take the G channel, as its closest to D2D
                     let alpha = pixels[i * 3 + 1] as u8;
                     bgra_pixels[i * 4 + 0] = alpha;
                     bgra_pixels[i * 4 + 1] = alpha;
                     bgra_pixels[i * 4 + 2] = alpha;
                     bgra_pixels[i * 4 + 3] = alpha;
                 }
                 bgra_pixels
             }
-            FontRenderMode::Subpixel => {
+            (FontRenderMode::Subpixel, false) => {
                 let length = pixels.len() / 3;
                 let mut bgra_pixels: Vec<u8> = vec![0; length * 4];
                 for i in 0 .. length {
                     bgra_pixels[i * 4 + 0] = pixels[i * 3 + 2];
                     bgra_pixels[i * 4 + 1] = pixels[i * 3 + 1];
                     bgra_pixels[i * 4 + 2] = pixels[i * 3 + 0];
                     bgra_pixels[i * 4 + 3] = 0xff;
                 }
                 bgra_pixels
             }
         }
     }
 
-    pub fn is_bitmap_font(&mut self, font: &FontInstance) -> bool {
-        // If bitmaps are requested, then treat as a bitmap font to disable transforms.
-        // If mono AA is requested, let that take priority over using bitmaps.
-        font.render_mode != FontRenderMode::Mono &&
-            font.flags.contains(FontInstanceFlags::EMBEDDED_BITMAPS)
-    }
-
     pub fn prepare_font(font: &mut FontInstance) {
         match font.render_mode {
-            FontRenderMode::Mono | FontRenderMode::Bitmap => {
-                // In mono/bitmap modes the color of the font is irrelevant.
+            FontRenderMode::Mono => {
+                // In mono mode the color of the font is irrelevant.
                 font.color = ColorU::new(255, 255, 255, 255);
-                // Subpixel positioning is disabled in mono and bitmap modes.
+                // Subpixel positioning is disabled in mono mode.
                 font.subpx_dir = SubpixelDirection::None;
             }
             FontRenderMode::Alpha => {
                 font.color = font.color.luminance_color().quantize();
             }
             FontRenderMode::Subpixel => {
                 font.color = font.color.quantize();
             }
         }
     }
 
     pub fn rasterize_glyph(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<RasterizedGlyph> {
-        let analysis = self.create_glyph_analysis(font, key);
+        let (.., y_scale) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
+        let size = (font.size.to_f64_px() * y_scale) as f32;
+        let bitmaps = is_bitmap_font(font);
+        let transform = if bitmaps {
+            None
+        } else {
+            let (x_offset, y_offset) = font.get_subpx_offset(key);
+            let shape = font.transform.pre_scale(y_scale.recip() as f32, y_scale.recip() as f32);
+            Some(dwrote::DWRITE_MATRIX {
+                m11: shape.scale_x,
+                m12: shape.skew_y,
+                m21: shape.skew_x,
+                m22: shape.scale_y,
+                dx: x_offset as f32,
+                dy: y_offset as f32,
+            })
+        };
+
+        let analysis = self.create_glyph_analysis(font, key, size, transform, bitmaps);
         let texture_type = dwrite_texture_type(font.render_mode);
 
         let bounds = analysis.get_alpha_texture_bounds(texture_type);
         let width = (bounds.right - bounds.left) as u32;
         let height = (bounds.bottom - bounds.top) as u32;
 
         // Alpha texture bounds can sometimes return an empty rect
         // Such as for spaces
         if width == 0 || height == 0 {
             return None;
         }
 
         let pixels = analysis.create_alpha_texture(texture_type, bounds);
-        let mut bgra_pixels = self.convert_to_bgra(&pixels, font.render_mode);
+        let mut bgra_pixels = self.convert_to_bgra(&pixels, font.render_mode, bitmaps);
 
         let lut_correction = match font.render_mode {
-            FontRenderMode::Mono | FontRenderMode::Bitmap => &self.gdi_gamma_lut,
+            FontRenderMode::Mono => &self.gdi_gamma_lut,
             FontRenderMode::Alpha | FontRenderMode::Subpixel => {
-                if font.flags.contains(FontInstanceFlags::FORCE_GDI) {
+                if bitmaps || font.flags.contains(FontInstanceFlags::FORCE_GDI) {
                     &self.gdi_gamma_lut
                 } else {
                     &self.gamma_lut
                 }
             }
         };
         lut_correction.preblend(&mut bgra_pixels, font.color);
 
         Some(RasterizedGlyph {
             left: bounds.left as f32,
             top: -bounds.top as f32,
             width,
             height,
-            scale: 1.0,
-            format: font.get_glyph_format(false),
+            scale: if bitmaps { y_scale.recip() as f32 } else { 1.0 },
+            format: if bitmaps { GlyphFormat::Bitmap } else { font.get_glyph_format() },
             bytes: bgra_pixels,
         })
     }
 }
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -746,18 +746,17 @@ impl TextRunPrimitiveCpu {
     pub fn get_font(
         &self,
         device_pixel_ratio: f32,
         transform: &LayerToWorldTransform,
         rasterization_kind: RasterizationSpace,
     ) -> FontInstance {
         let mut font = self.font.clone();
         font.size = font.size.scale_by(device_pixel_ratio);
-        if font.render_mode != FontRenderMode::Bitmap &&
-           rasterization_kind == RasterizationSpace::Screen {
+        if rasterization_kind == RasterizationSpace::Screen {
             if transform.has_perspective_component() || !transform.has_2d_inverse() {
                 font.render_mode = font.render_mode.limit_by(FontRenderMode::Alpha);
             } else {
                 font.transform = FontTransform::from(transform).quantize();
             }
         }
         font
     }
@@ -816,17 +815,17 @@ impl TextRunPrimitiveCpu {
         request.push(ColorF::from(self.font.color).premultiplied());
         // this is the only case where we need to provide plain color to GPU
         request.extend_from_slice(&[
             GpuBlockData { data: [bg_color.r, bg_color.g, bg_color.b, 1.0] }
         ]);
         request.push([
             self.offset.x,
             self.offset.y,
-            self.font.subpx_dir.limit_by(self.font.render_mode) as u32 as f32,
+            0.0,
             0.0,
         ]);
         request.extend_from_slice(&self.glyph_gpu_blocks);
 
         assert!(request.current_used_block_num() <= MAX_VERTEX_TEXTURE_WIDTH);
     }
 }
 
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -25,19 +25,19 @@ use debug_colors;
 use debug_render::DebugRenderer;
 #[cfg(feature = "debugger")]
 use debug_server::{self, DebugServer};
 use device::{DepthFunction, Device, FrameId, Program, UploadMethod, Texture,
              VertexDescriptor, PBO};
 use device::{get_gl_format_bgra, ExternalTexture, FBOId, TextureSlot, VertexAttribute,
              VertexAttributeKind};
 use device::{FileWatcherHandler, ShaderError, TextureFilter, TextureTarget,
-             VertexUsageHint, VAO};
+             VertexUsageHint, VAO, VBO, CustomVAO};
 use device::ProgramCache;
-use euclid::{rect, ScaleFactor, Transform3D};
+use euclid::{rect, TypedScale, Transform3D};
 use frame_builder::FrameBuilderConfig;
 use gleam::gl;
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 use gpu_types::PrimitiveInstance;
 use internal_types::{BatchTextures, SourceTexture, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE};
 use internal_types::{CacheTextureId, FastHashMap, RenderedDocument, ResultMsg, TextureUpdateOp};
 use internal_types::{DebugOutput, RenderPassIndex, RenderTargetInfo, TextureUpdateList, TextureUpdateSource};
@@ -66,16 +66,20 @@ use texture_cache::TextureCache;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use tiling::{AlphaRenderTarget, ColorRenderTarget};
 use tiling::{RenderPass, RenderPassKind, RenderTargetList};
 use tiling::{BatchKey, BatchKind, BrushBatchKind, BrushImageSourceKind, Frame, RenderTarget, ScalingInfo, TransformBatchKind};
 use time::precise_time_ns;
 use util::TransformedRectKind;
 
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
+/// Enabling this toggle would force the GPU cache scattered texture to
+/// be resized every frame, which enables GPU debuggers to see if this
+/// is performed correctly.
+const GPU_CACHE_RESIZE_TEST: bool = false;
 
 const GPU_TAG_BRUSH_SOLID: GpuProfileTag = GpuProfileTag {
     label: "B_Solid",
     color: debug_colors::RED,
 };
 const GPU_TAG_BRUSH_MASK: GpuProfileTag = GpuProfileTag {
     label: "B_Mask",
     color: debug_colors::BLACK,
@@ -276,32 +280,34 @@ type ShaderMode = i32;
 enum TextShaderMode {
     Alpha = 0,
     SubpixelConstantTextColor = 1,
     SubpixelPass0 = 2,
     SubpixelPass1 = 3,
     SubpixelWithBgColorPass0 = 4,
     SubpixelWithBgColorPass1 = 5,
     SubpixelWithBgColorPass2 = 6,
-    ColorBitmap = 7,
+    Bitmap = 7,
+    ColorBitmap = 8,
 }
 
 impl Into<ShaderMode> for TextShaderMode {
     fn into(self) -> i32 {
         self as i32
     }
 }
 
 impl From<GlyphFormat> for TextShaderMode {
     fn from(format: GlyphFormat) -> TextShaderMode {
         match format {
             GlyphFormat::Alpha | GlyphFormat::TransformedAlpha => TextShaderMode::Alpha,
             GlyphFormat::Subpixel | GlyphFormat::TransformedSubpixel => {
                 panic!("Subpixel glyph formats must be handled separately.");
             }
+            GlyphFormat::Bitmap => TextShaderMode::Bitmap,
             GlyphFormat::ColorBitmap => TextShaderMode::ColorBitmap,
         }
     }
 }
 
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 enum TextureSampler {
     Color0,
@@ -436,16 +442,32 @@ const DESC_CLIP: VertexDescriptor = Vert
         VertexAttribute {
             name: "aClipDataResourceAddress",
             count: 4,
             kind: VertexAttributeKind::U16,
         },
     ],
 };
 
+const DESC_GPU_CACHE_UPDATE: VertexDescriptor = VertexDescriptor {
+    vertex_attributes: &[
+        VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U16Norm,
+        },
+        VertexAttribute {
+            name: "aValue",
+            count: 4,
+            kind: VertexAttributeKind::F32,
+        },
+    ],
+    instance_attributes: &[],
+};
+
 #[derive(Debug, Copy, Clone)]
 enum VertexArrayKind {
     Primitive,
     Blur,
     Clip,
 }
 
 #[derive(Clone, Debug, PartialEq)]
@@ -754,145 +776,296 @@ struct CacheRow {
 }
 
 impl CacheRow {
     fn new() -> CacheRow {
         CacheRow { is_dirty: false }
     }
 }
 
+/// The bus over which CPU and GPU versions of the cache
+/// get synchronized.
+enum CacheBus {
+    /// PBO-based updates, currently operate on a row granularity.
+    /// Therefore, are subject to fragmentation issues.
+    PixelBuffer {
+        /// PBO used for transfers.
+        buffer: PBO,
+        /// Meta-data about the cached rows.
+        rows: Vec<CacheRow>,
+        /// Mirrored block data on CPU.
+        cpu_blocks: Vec<GpuBlockData>,
+    },
+    /// Shader-based scattering updates. Currently rendered by a set
+    /// of points into the GPU texture, each carrying a `GpuBlockData`.
+    Scatter {
+        /// Special program to run the scattered update.
+        program: Program,
+        /// VAO containing the source vertex buffers.
+        vao: CustomVAO,
+        /// VBO for positional data, supplied as normalized `u16`.
+        buf_position: VBO<[u16; 2]>,
+        /// VBO for gpu block data.
+        buf_value: VBO<GpuBlockData>,
+        /// Currently stored block count.
+        count: usize,
+    },
+}
+
 /// The device-specific representation of the cache texture in gpu_cache.rs
 struct CacheTexture {
     texture: Texture,
-    pbo: PBO,
-    rows: Vec<CacheRow>,
-    cpu_blocks: Vec<GpuBlockData>,
+    bus: CacheBus,
 }
 
 impl CacheTexture {
-    fn new(device: &mut Device) -> Self {
+    fn new(device: &mut Device, use_scatter: bool) -> Result<Self, RendererError> {
         let texture = device.create_texture(TextureTarget::Default);
-        let pbo = device.create_pbo();
-
-        CacheTexture {
+
+        let bus = if use_scatter {
+            let program = device
+                .create_program("gpu_cache_update", "", &DESC_GPU_CACHE_UPDATE)?;
+            let buf_position = device.create_vbo();
+            let buf_value = device.create_vbo();
+            //Note: the vertex attributes have to be supplied in the same order
+            // as for program creation, but each assigned to a different stream.
+            let vao = device.create_custom_vao(&[
+                buf_position.stream_with(&DESC_GPU_CACHE_UPDATE.vertex_attributes[0..1]),
+                buf_value   .stream_with(&DESC_GPU_CACHE_UPDATE.vertex_attributes[1..2]),
+            ]);
+            CacheBus::Scatter {
+                program,
+                vao,
+                buf_position,
+                buf_value,
+                count: 0,
+            }
+        } else {
+            let buffer = device.create_pbo();
+            CacheBus::PixelBuffer {
+                buffer,
+                rows: Vec::new(),
+                cpu_blocks: Vec::new(),
+            }
+        };
+
+        Ok(CacheTexture {
             texture,
-            pbo,
-            rows: Vec::new(),
-            cpu_blocks: Vec::new(),
-        }
+            bus,
+        })
     }
 
     fn deinit(self, device: &mut Device) {
-        device.delete_pbo(self.pbo);
         device.delete_texture(self.texture);
-    }
-
-    fn apply_patch(&mut self, update: &GpuCacheUpdate, blocks: &[GpuBlockData]) -> usize {
-        match update {
-            &GpuCacheUpdate::Copy {
-                block_index,
-                block_count,
-                address,
-            } => {
-                let row = address.v as usize;
-
-                // Ensure that the CPU-side shadow copy of the GPU cache data has enough
-                // rows to apply this patch.
-                while self.rows.len() <= row {
-                    // Add a new row.
-                    self.rows.push(CacheRow::new());
-                    // Add enough GPU blocks for this row.
-                    self.cpu_blocks
-                        .extend_from_slice(&[GpuBlockData::empty(); MAX_VERTEX_TEXTURE_WIDTH]);
-                }
-
-                // This row is dirty (needs to be updated in GPU texture).
-                self.rows[row].is_dirty = true;
-
-                // Copy the blocks from the patch array in the shadow CPU copy.
-                let block_offset = row * MAX_VERTEX_TEXTURE_WIDTH + address.u as usize;
-                let data = &mut self.cpu_blocks[block_offset .. (block_offset + block_count)];
-                for i in 0 .. block_count {
-                    data[i] = blocks[block_index + i];
-                }
-
-                block_count
+        match self.bus {
+            CacheBus::PixelBuffer { buffer, ..} => {
+                device.delete_pbo(buffer);
+            }
+            CacheBus::Scatter { program, vao, buf_position, buf_value, ..} => {
+                device.delete_program(program);
+                device.delete_custom_vao(vao);
+                device.delete_vbo(buf_position);
+                device.delete_vbo(buf_value);
             }
         }
     }
 
-    fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) -> usize {
+    fn get_height(&self) -> u32 {
+        self.texture.get_dimensions().height
+    }
+
+    fn prepare_for_updates(
+        &mut self,
+        device: &mut Device,
+        total_block_count: usize,
+        max_height: u32,
+    ) {
         // See if we need to create or resize the texture.
-        let current_dimensions = self.texture.get_dimensions();
-        if updates.height > current_dimensions.height {
-            // Create a f32 texture that can be used for the vertex shader
-            // to fetch data from.
-            device.init_texture(
-                &mut self.texture,
-                MAX_VERTEX_TEXTURE_WIDTH as u32,
-                updates.height as u32,
-                ImageFormat::RGBAF32,
-                TextureFilter::Nearest,
-                None,
-                1,
-                None,
-            );
-
-            // Copy the current texture into the newly resized texture.
-            if current_dimensions.height > 0 {
-                // If we had to resize the texture, just mark all rows
-                // as dirty so they will be uploaded to the texture
-                // during the next flush.
-                for row in &mut self.rows {
-                    row.is_dirty = true;
+        let old_size = self.texture.get_dimensions();
+        let new_size = DeviceUintSize::new(MAX_VERTEX_TEXTURE_WIDTH as _, max_height);
+
+        match self.bus {
+            CacheBus::PixelBuffer { ref mut rows, .. } => {
+                if max_height > old_size.height {
+                    // Create a f32 texture that can be used for the vertex shader
+                    // to fetch data from.
+                    device.init_texture(
+                        &mut self.texture,
+                        new_size.width,
+                        new_size.height,
+                        ImageFormat::RGBAF32,
+                        TextureFilter::Nearest,
+                        None,
+                        1,
+                        None,
+                    );
+
+                    // If we had to resize the texture, just mark all rows
+                    // as dirty so they will be uploaded to the texture
+                    // during the next flush.
+                    for row in rows.iter_mut() {
+                        row.is_dirty = true;
+                    }
+                }
+            }
+            CacheBus::Scatter {
+                ref mut buf_position,
+                ref mut buf_value,
+                ref mut count,
+                ..
+            } => {
+                *count = 0;
+                if total_block_count > buf_value.allocated_count() {
+                    device.allocate_vbo(buf_position, total_block_count, VertexUsageHint::Stream);
+                    device.allocate_vbo(buf_value,    total_block_count, VertexUsageHint::Stream);
+                }
+
+                if new_size.height > old_size.height || GPU_CACHE_RESIZE_TEST {
+                    if old_size.height > 0 {
+                        device.resize_renderable_texture(&mut self.texture, new_size);
+                    } else {
+                        device.init_texture(
+                            &mut self.texture,
+                            new_size.width,
+                            new_size.height,
+                            ImageFormat::RGBAF32,
+                            TextureFilter::Nearest,
+                            Some(RenderTargetInfo {
+                                has_depth: false,
+                            }),
+                            1,
+                            None,
+                        );
+                    }
                 }
             }
         }
-
-        let mut updated_blocks = 0;
-        for update in &updates.updates {
-            updated_blocks += self.apply_patch(update, &updates.blocks);
+    }
+
+    fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
+        match self.bus {
+            CacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
+                for update in &updates.updates {
+                    match update {
+                        &GpuCacheUpdate::Copy {
+                            block_index,
+                            block_count,
+                            address,
+                        } => {
+                            let row = address.v as usize;
+
+                            // Ensure that the CPU-side shadow copy of the GPU cache data has enough
+                            // rows to apply this patch.
+                            while rows.len() <= row {
+                                // Add a new row.
+                                rows.push(CacheRow::new());
+                                // Add enough GPU blocks for this row.
+                                cpu_blocks
+                                    .extend_from_slice(&[GpuBlockData::empty(); MAX_VERTEX_TEXTURE_WIDTH]);
+                            }
+
+                            // This row is dirty (needs to be updated in GPU texture).
+                            rows[row].is_dirty = true;
+
+                            // Copy the blocks from the patch array in the shadow CPU copy.
+                            let block_offset = row * MAX_VERTEX_TEXTURE_WIDTH + address.u as usize;
+                            let data = &mut cpu_blocks[block_offset .. (block_offset + block_count)];
+                            for i in 0 .. block_count {
+                                data[i] = updates.blocks[block_index + i];
+                            }
+                        }
+                    }
+                }
+            }
+            CacheBus::Scatter {
+                ref buf_position,
+                ref buf_value,
+                ref mut count,
+                ..
+            } => {
+                //TODO: re-use this heap allocation
+                // Unused positions will be left as 0xFFFF, which translates to
+                // (1.0, 1.0) in the vertex output position and gets culled out
+                let mut position_data = vec![[!0u16; 2]; updates.blocks.len()];
+                let size = self.texture.get_dimensions().to_usize();
+
+                for update in &updates.updates {
+                    match update {
+                        &GpuCacheUpdate::Copy {
+                            block_index,
+                            block_count,
+                            address,
+                        } => {
+                            // Convert the absolute texel position into normalized
+                            let y = ((2*address.v as usize + 1) << 15) / size.height;
+                            for i in 0 .. block_count {
+                                let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width;
+                                position_data[block_index + i] = [x as _, y as _];
+                            }
+                        }
+                    }
+                }
+
+                device.fill_vbo(buf_value, &updates.blocks, *count);
+                device.fill_vbo(buf_position, &position_data, *count);
+                *count += position_data.len();
+            }
         }
-        updated_blocks
     }
 
     fn flush(&mut self, device: &mut Device) -> usize {
-        let rows_dirty = self.rows
-            .iter()
-            .filter(|row| row.is_dirty)
-            .count();
-        if rows_dirty == 0 {
-            return 0
+        match self.bus {
+            CacheBus::PixelBuffer { ref buffer, ref mut rows, ref cpu_blocks } => {
+                let rows_dirty = rows
+                    .iter()
+                    .filter(|row| row.is_dirty)
+                    .count();
+                if rows_dirty == 0 {
+                    return 0
+                }
+
+                let mut uploader = device.upload_texture(
+                    &self.texture,
+                    buffer,
+                    rows_dirty * MAX_VERTEX_TEXTURE_WIDTH,
+                );
+
+                for (row_index, row) in rows.iter_mut().enumerate() {
+                    if !row.is_dirty {
+                        continue;
+                    }
+
+                    let block_index = row_index * MAX_VERTEX_TEXTURE_WIDTH;
+                    let cpu_blocks =
+                        &cpu_blocks[block_index .. (block_index + MAX_VERTEX_TEXTURE_WIDTH)];
+                    let rect = DeviceUintRect::new(
+                        DeviceUintPoint::new(0, row_index as u32),
+                        DeviceUintSize::new(MAX_VERTEX_TEXTURE_WIDTH as u32, 1),
+                    );
+
+                    uploader.upload(rect, 0, None, cpu_blocks);
+
+                    row.is_dirty = false;
+                }
+
+                rows_dirty
+            }
+            CacheBus::Scatter { ref program, ref vao, count, .. } => {
+                device.disable_depth();
+                device.set_blend(false);
+                device.bind_program(program);
+                device.bind_custom_vao(vao);
+                device.bind_draw_target(
+                    Some((&self.texture, 0)),
+                    Some(self.texture.get_dimensions()),
+                );
+                device.draw_nonindexed_points(0, count as _);
+                0
+            }
         }
-
-        let mut uploader = device.upload_texture(
-            &self.texture,
-            &self.pbo,
-            rows_dirty * MAX_VERTEX_TEXTURE_WIDTH,
-        );
-
-        for (row_index, row) in self.rows.iter_mut().enumerate() {
-            if !row.is_dirty {
-                continue;
-            }
-
-            let block_index = row_index * MAX_VERTEX_TEXTURE_WIDTH;
-            let cpu_blocks =
-                &self.cpu_blocks[block_index .. (block_index + MAX_VERTEX_TEXTURE_WIDTH)];
-            let rect = DeviceUintRect::new(
-                DeviceUintPoint::new(0, row_index as u32),
-                DeviceUintSize::new(MAX_VERTEX_TEXTURE_WIDTH as u32, 1),
-            );
-
-            uploader.upload(rect, 0, None, cpu_blocks);
-
-            row.is_dirty = false;
-        }
-
-        rows_dirty
     }
 }
 
 struct VertexDataTexture {
     texture: Texture,
     pbo: PBO,
 }
 
@@ -1246,16 +1419,17 @@ impl TextShader {
         transform_kind: TransformedRectKind,
         projection: &Transform3D<f32>,
         mode: M,
         renderer_errors: &mut Vec<RendererError>,
     ) where M: Into<ShaderMode> {
         match glyph_format {
             GlyphFormat::Alpha |
             GlyphFormat::Subpixel |
+            GlyphFormat::Bitmap |
             GlyphFormat::ColorBitmap => {
                 match transform_kind {
                     TransformedRectKind::AxisAligned => {
                         self.simple.bind(device, projection, mode, renderer_errors)
                     }
                     TransformedRectKind::Complex => {
                         self.transform.bind(device, projection, mode, renderer_errors)
                     }
@@ -1957,16 +2131,21 @@ impl Renderer {
 
         let texture_cache_upload_pbo = device.create_pbo();
 
         let texture_resolver = SourceTextureResolver::new(&mut device);
 
         let node_data_texture = VertexDataTexture::new(&mut device);
         let render_task_texture = VertexDataTexture::new(&mut device);
 
+        let gpu_cache_texture = CacheTexture::new(
+            &mut device,
+            options.scatter_gpu_cache_updates,
+        )?;
+
         device.end_frame();
 
         let backend_notifier = notifier.clone();
 
         let default_font_render_mode = match (options.enable_aa, options.enable_subpixel_aa) {
             (true, true) => FontRenderMode::Subpixel,
             (true, false) => FontRenderMode::Alpha,
             (false, _) => FontRenderMode::Mono,
@@ -2033,17 +2212,16 @@ impl Renderer {
                 );
                 backend.run(backend_profile_counters);
                 if let Some(ref thread_listener) = *thread_listener_for_render_backend {
                     thread_listener.thread_stopped(&thread_name);
                 }
             })
         };
 
-        let gpu_cache_texture = CacheTexture::new(&mut device);
         let gpu_profile = GpuProfiler::new(Rc::clone(device.rc_gl()));
 
         let mut renderer = Renderer {
             result_rx,
             debug_server,
             device,
             active_documents: Vec::new(),
             pending_texture_updates: Vec::new(),
@@ -2507,21 +2685,16 @@ impl Renderer {
 
             self.device.disable_scissor();
             self.device.disable_depth();
             self.device.set_blend(false);
             //self.update_shaders();
 
             self.update_texture_cache();
 
-            self.device.bind_texture(
-                TextureSampler::ResourceCache,
-                &self.gpu_cache_texture.texture,
-            );
-
             frame_id
         });
 
         profile_timers.cpu_time.profile(|| {
             let clear_depth_value = if self.are_documents_intersecting_depth() {
                 None
             } else {
                 Some(1.0)
@@ -2555,17 +2728,17 @@ impl Renderer {
 
             // Re-use whatever targets possible from the pool, before
             // they get changed/re-allocated by the rendered frames.
             for doc_with_id in &mut active_documents {
                 self.prepare_tile_frame(&mut doc_with_id.1.frame);
             }
 
             for &mut (_, RenderedDocument { ref mut frame, .. }) in &mut active_documents {
-                self.update_gpu_cache(frame);
+                self.prepare_gpu_cache(frame);
 
                 self.draw_tile_frame(
                     frame,
                     framebuffer_size,
                     clear_depth_value.is_some(),
                     cpu_frame_id,
                     &mut stats
                 );
@@ -2630,27 +2803,64 @@ impl Renderer {
     }
 
     pub fn layers_are_bouncing_back(&self) -> bool {
         self.active_documents
             .iter()
             .any(|&(_, ref render_doc)| !render_doc.layers_bouncing_back.is_empty())
     }
 
-    fn update_gpu_cache(&mut self, frame: &Frame) {
+    fn prepare_gpu_cache(&mut self, frame: &Frame) {
         let _gm = self.gpu_profile.start_marker("gpu cache update");
-        let mut updated_blocks = 0;
+
+        let deferred_update_list = self.update_deferred_resolves(frame);
+        self.pending_gpu_cache_updates.extend(deferred_update_list);
+
+        // For an artificial stress test of GPU cache resizing,
+        // always pass an extra update list with at least one block in it.
+        let gpu_cache_height = self.gpu_cache_texture.get_height();
+        if gpu_cache_height != 0 &&  GPU_CACHE_RESIZE_TEST {
+            self.pending_gpu_cache_updates.push(GpuCacheUpdateList {
+                height: gpu_cache_height,
+                blocks: vec![[1f32; 4].into()],
+                updates: Vec::new(),
+            });
+        }
+
+        let (updated_blocks, max_requested_height) = self
+            .pending_gpu_cache_updates
+            .iter()
+            .fold((0, gpu_cache_height), |(count, height), list| {
+                (count + list.blocks.len(), cmp::max(height, list.height))
+            });
+
+        //Note: if we decide to switch to scatter-style GPU cache update
+        // permanently, we can have this code nicer with `BufferUploader` kind
+        // of helper, similarly to how `TextureUploader` API is used.
+        self.gpu_cache_texture.prepare_for_updates(
+            &mut self.device,
+            updated_blocks,
+            max_requested_height,
+        );
+
         for update_list in self.pending_gpu_cache_updates.drain(..) {
-            updated_blocks += self.gpu_cache_texture
+            assert!(update_list.height <= max_requested_height);
+            self.gpu_cache_texture
                 .update(&mut self.device, &update_list);
         }
-        self.update_deferred_resolves(frame);
 
         let updated_rows = self.gpu_cache_texture.flush(&mut self.device);
 
+        // Note: the texture might have changed during the `update`,
+        // so we need to bind it here.
+        self.device.bind_texture(
+            TextureSampler::ResourceCache,
+            &self.gpu_cache_texture.texture,
+        );
+
         let counters = &mut self.backend_profile_counters.resources.gpu_cache;
         counters.updated_rows.set(updated_rows);
         counters.updated_blocks.set(updated_blocks);
     }
 
     fn update_texture_cache(&mut self) {
         let _gm = self.gpu_profile.start_marker("texture cache update");
         let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]);
@@ -2951,17 +3161,17 @@ impl Renderer {
             // framebuffer readbacks that are needed for each
             // composite operation in this batch.
             let source = &render_tasks[source_id];
             let backdrop = &render_tasks[task_id];
             let readback = &render_tasks[backdrop_id];
 
             let (readback_rect, readback_layer) = readback.get_target_rect();
             let (backdrop_rect, _) = backdrop.get_target_rect();
-            let content_to_device_scale = ScaleFactor::<_, _, DevicePixel>::new(1i32);
+            let content_to_device_scale = TypedScale::<_, _, DevicePixel>::new(1i32);
             let backdrop_screen_origin = match backdrop.kind {
                 RenderTaskKind::Picture(ref task_info) => task_info
                     .content_origin
                     .to_i32()
                     * content_to_device_scale,
                 _ => panic!("bug: composite on non-picture?"),
             };
             let source_screen_origin = match source.kind {
@@ -3643,83 +3853,89 @@ impl Renderer {
                     stats,
                 );
             }
         }
 
         self.gpu_profile.finish_sampler(alpha_sampler);
     }
 
-    fn update_deferred_resolves(&mut self, frame: &Frame) {
+    fn update_deferred_resolves(&mut self, frame: &Frame) -> Option<GpuCacheUpdateList> {
         // The first thing we do is run through any pending deferred
         // resolves, and use a callback to get the UV rect for this
         // custom item. Then we patch the resource_rects structure
         // here before it's uploaded to the GPU.
-        if !frame.deferred_resolves.is_empty() {
-            let handler = self.external_image_handler
-                .as_mut()
-                .expect("Found external image, but no handler set!");
-
-            for deferred_resolve in &frame.deferred_resolves {
-                self.gpu_profile.place_marker("deferred resolve");
-                let props = &deferred_resolve.image_properties;
-                let ext_image = props
-                    .external_image
-                    .expect("BUG: Deferred resolves must be external images!");
-                let image = handler.lock(ext_image.id, ext_image.channel_index);
-                let texture_target = match ext_image.image_type {
-                    ExternalImageType::Texture2DHandle => TextureTarget::Default,
-                    ExternalImageType::Texture2DArrayHandle => TextureTarget::Array,
-                    ExternalImageType::TextureRectHandle => TextureTarget::Rect,
-                    ExternalImageType::TextureExternalHandle => TextureTarget::External,
-                    ExternalImageType::ExternalBuffer => {
-                        panic!(
-                            "{:?} is not a suitable image type in update_deferred_resolves().",
-                            ext_image.image_type
-                        );
-                    }
-                };
-
-                // In order to produce the handle, the external image handler may call into
-                // the GL context and change some states.
-                self.device.reset_state();
-
-                let texture = match image.source {
-                    ExternalImageSource::NativeTexture(texture_id) => {
-                        ExternalTexture::new(texture_id, texture_target)
-                    }
-                    ExternalImageSource::Invalid => {
-                        warn!(
-                            "Invalid ext-image for ext_id:{:?}, channel:{}.",
-                            ext_image.id,
-                            ext_image.channel_index
-                        );
-                        // Just use 0 as the gl handle for this failed case.
-                        ExternalTexture::new(0, texture_target)
-                    }
-                    _ => panic!("No native texture found."),
-                };
-
-                self.texture_resolver
-                    .external_images
-                    .insert((ext_image.id, ext_image.channel_index), texture);
-
-                let update = GpuCacheUpdate::Copy {
-                    block_index: 0,
-                    block_count: 1,
-                    address: deferred_resolve.address,
-                };
-
-                let blocks = [
-                    [image.u0, image.v0, image.u1, image.v1].into(),
-                    [0.0; 4].into(),
-                ];
-                self.gpu_cache_texture.apply_patch(&update, &blocks);
-            }
+        if frame.deferred_resolves.is_empty() {
+            return None;
         }
+
+        let handler = self.external_image_handler
+            .as_mut()
+            .expect("Found external image, but no handler set!");
+
+        let mut list = GpuCacheUpdateList {
+            height: self.gpu_cache_texture.get_height(),
+            blocks: Vec::new(),
+            updates: Vec::new(),
+        };
+
+        for deferred_resolve in &frame.deferred_resolves {
+            self.gpu_profile.place_marker("deferred resolve");
+            let props = &deferred_resolve.image_properties;
+            let ext_image = props
+                .external_image
+                .expect("BUG: Deferred resolves must be external images!");
+            let image = handler.lock(ext_image.id, ext_image.channel_index);
+            let texture_target = match ext_image.image_type {
+                ExternalImageType::Texture2DHandle => TextureTarget::Default,
+                ExternalImageType::Texture2DArrayHandle => TextureTarget::Array,
+                ExternalImageType::TextureRectHandle => TextureTarget::Rect,
+                ExternalImageType::TextureExternalHandle => TextureTarget::External,
+                ExternalImageType::ExternalBuffer => {
+                    panic!(
+                        "{:?} is not a suitable image type in update_deferred_resolves().",
+                        ext_image.image_type
+                    );
+                }
+            };
+
+            // In order to produce the handle, the external image handler may call into
+            // the GL context and change some states.
+            self.device.reset_state();
+
+            let texture = match image.source {
+                ExternalImageSource::NativeTexture(texture_id) => {
+                    ExternalTexture::new(texture_id, texture_target)
+                }
+                ExternalImageSource::Invalid => {
+                    warn!(
+                        "Invalid ext-image for ext_id:{:?}, channel:{}.",
+                        ext_image.id,
+                        ext_image.channel_index
+                    );
+                    // Just use 0 as the gl handle for this failed case.
+                    ExternalTexture::new(0, texture_target)
+                }
+                _ => panic!("No native texture found."),
+            };
+
+            self.texture_resolver
+                .external_images
+                .insert((ext_image.id, ext_image.channel_index), texture);
+
+            list.updates.push(GpuCacheUpdate::Copy {
+                block_index: list.blocks.len(),
+                block_count: 2,
+                address: deferred_resolve.address,
+            });
+            list.blocks.push([image.u0, image.v0, image.u1, image.v1].into());
+            list.blocks.push([0f32; 4].into());
+        }
+
+        Some(list)
     }
 
     fn unlock_external_images(&mut self) {
         if !self.texture_resolver.external_images.is_empty() {
             let handler = self.external_image_handler
                 .as_mut()
                 .expect("Found external image, but no handler set!");
 
@@ -4142,16 +4358,31 @@ impl Renderer {
     }
 
     pub fn read_pixels_rgba8(&self, rect: DeviceUintRect) -> Vec<u8> {
         let mut pixels = vec![0u8; (4 * rect.size.width * rect.size.height) as usize];
         self.read_pixels_into(rect, ReadPixelsFormat::Rgba8, &mut pixels);
         pixels
     }
 
+    pub fn read_gpu_cache(&mut self) -> (DeviceUintSize, Vec<u8>) {
+        let size = self.gpu_cache_texture.texture.get_dimensions();
+        let mut texels = vec![0u8; 4 * (size.width * size.height) as usize];
+        self.device.begin_frame();
+        self.device.bind_read_target(Some((&self.gpu_cache_texture.texture, 0)));
+        self.read_pixels_into(
+            DeviceUintRect::new(DeviceUintPoint::zero(), size),
+            ReadPixelsFormat::Rgba8,
+            &mut texels,
+        );
+        self.device.bind_read_target(None);
+        self.device.end_frame();
+        (size, texels)
+    }
+
     pub fn read_pixels_into(
         &self,
         rect: DeviceUintRect,
         format: ReadPixelsFormat,
         output: &mut [u8],
     ) {
         let (gl_format, gl_type, size) = match format {
             ReadPixelsFormat::Rgba8 => (gl::RGBA, gl::UNSIGNED_BYTE, 4),
@@ -4294,16 +4525,17 @@ pub struct RendererOptions {
     pub debug: bool,
     pub enable_scrollbars: bool,
     pub precache_shaders: bool,
     pub renderer_kind: RendererKind,
     pub enable_subpixel_aa: bool,
     pub clear_color: Option<ColorF>,
     pub enable_clear_scissor: bool,
     pub max_texture_size: Option<u32>,
+    pub scatter_gpu_cache_updates: bool,
     pub upload_method: UploadMethod,
     pub workers: Option<Arc<ThreadPool>>,
     pub blob_image_renderer: Option<Box<BlobImageRenderer>>,
     pub recorder: Option<Box<ApiRecordingReceiver>>,
     pub thread_listener: Option<Box<ThreadListener + Send + Sync>>,
     pub enable_render_on_scroll: bool,
     pub cached_programs: Option<Rc<ProgramCache>>,
     pub debug_flags: DebugFlags,
@@ -4322,17 +4554,20 @@ impl Default for RendererOptions {
             debug: false,
             enable_scrollbars: false,
             precache_shaders: false,
             renderer_kind: RendererKind::Native,
             enable_subpixel_aa: false,
             clear_color: Some(ColorF::new(1.0, 1.0, 1.0, 1.0)),
             enable_clear_scissor: true,
             max_texture_size: None,
-            //TODO: switch to `Immediate` on Angle
+            // Scattered GPU cache updates haven't met a test that would show their superiority yet.
+            scatter_gpu_cache_updates: false,
+            // This is best as `Immediate` on Angle, or `Pixelbuffer(Dynamic)` on GL,
+            // but we are unable to make this decision here, so picking the reasonable medium.
             upload_method: UploadMethod::PixelBuffer(VertexUsageHint::Stream),
             workers: None,
             blob_image_renderer: None,
             recorder: None,
             thread_listener: None,
             enable_render_on_scroll: true,
             renderer_id: None,
             cached_programs: None,
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -1,15 +1,15 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AddFont, BlobImageData, BlobImageResources, ResourceUpdate, ResourceUpdates};
 use api::{BlobImageDescriptor, BlobImageError, BlobImageRenderer, BlobImageRequest};
-use api::{ColorF, FontRenderMode};
+use api::ColorF;
 use api::{DevicePoint, DeviceUintRect, DeviceUintSize};
 use api::{Epoch, FontInstanceKey, FontKey, FontTemplate};
 use api::{ExternalImageData, ExternalImageType};
 use api::{FontInstanceOptions, FontInstancePlatformOptions, FontVariation};
 use api::{GlyphDimensions, GlyphKey, IdNamespace};
 use api::{ImageData, ImageDescriptor, ImageKey, ImageRendering};
 use api::{TileOffset, TileSize};
 use app_units::Au;
@@ -349,31 +349,27 @@ impl ResourceCache {
     ) {
         let FontInstanceOptions {
             render_mode,
             subpx_dir,
             flags,
             bg_color,
             ..
         } = options.unwrap_or_default();
-        assert!(render_mode != FontRenderMode::Bitmap);
-        let mut instance = FontInstance::new(
+        let instance = FontInstance::new(
             font_key,
             glyph_size,
             ColorF::new(0.0, 0.0, 0.0, 1.0),
             bg_color,
             render_mode,
             subpx_dir,
             flags,
             platform_options,
             variations,
         );
-        if self.glyph_rasterizer.is_bitmap_font(&instance) {
-            instance.render_mode = instance.render_mode.limit_by(FontRenderMode::Bitmap);
-        }
         self.resources.font_instances
             .write()
             .unwrap()
             .insert(instance_key, instance);
     }
 
     pub fn delete_font_instance(&mut self, instance_key: FontInstanceKey) {
         self.resources.font_instances
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -1052,17 +1052,17 @@ impl TextureUpdate {
 
                 TextureUpdateSource::Bytes { data: bytes }
             }
         };
 
         let update_op = match dirty_rect {
             Some(dirty) => {
                 let stride = descriptor.compute_stride();
-                let offset = descriptor.offset + dirty.origin.y * stride + dirty.origin.x;
+                let offset = descriptor.offset + dirty.origin.y * stride + dirty.origin.x * descriptor.format.bytes_per_pixel();
                 let origin =
                     DeviceUintPoint::new(origin.x + dirty.origin.x, origin.y + dirty.origin.y);
                 TextureUpdateOp::Update {
                     rect: DeviceUintRect::new(origin, dirty.size),
                     source: data_src,
                     stride: Some(stride),
                     offset,
                     layer_index,
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -1,18 +1,18 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadiusKind, ClipId, ColorF, DeviceIntPoint, ImageKey};
 use api::{DeviceIntRect, DeviceIntSize, device_length, DeviceUintPoint, DeviceUintRect, DeviceUintSize};
-use api::{DocumentLayer, ExternalImageType, FilterOp, FontRenderMode};
+use api::{DocumentLayer, ExternalImageType, FilterOp};
 use api::{ImageFormat, ImageRendering};
 use api::{LayerRect, MixBlendMode, PipelineId};
-use api::{TileOffset, YuvColorSpace, YuvFormat};
+use api::{SubpixelDirection, TileOffset, YuvColorSpace, YuvFormat};
 use api::{LayerToWorldTransform, WorldPixel};
 use border::{BorderCornerInstance, BorderCornerSide};
 use clip::{ClipSource, ClipStore};
 use clip_scroll_tree::{ClipScrollTree, CoordinateSystemId};
 use device::Texture;
 use euclid::{TypedTransform3D, vec3};
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle, GpuCacheUpdateList};
@@ -101,31 +101,18 @@ impl AlphaBatchHelpers for PrimitiveStor
         &self,
         metadata: &PrimitiveMetadata,
         transform_kind: TransformedRectKind,
     ) -> BlendMode {
         let needs_blending = !metadata.opacity.is_opaque || metadata.clip_task_id.is_some() ||
             transform_kind == TransformedRectKind::Complex;
 
         match metadata.prim_kind {
-            PrimitiveKind::TextRun => {
-                let font = &self.cpu_text_runs[metadata.cpu_prim_index.0].font;
-                match font.render_mode {
-                    FontRenderMode::Subpixel => {
-                        if font.bg_color.a != 0 {
-                            BlendMode::SubpixelWithBgColor
-                        } else {
-                            BlendMode::SubpixelConstantTextColor(font.color.into())
-                        }
-                    }
-                    FontRenderMode::Alpha |
-                    FontRenderMode::Mono |
-                    FontRenderMode::Bitmap => BlendMode::PremultipliedAlpha,
-                }
-            },
+            // Can only resolve the TextRun's blend mode once glyphs are fetched.
+            PrimitiveKind::TextRun => BlendMode::PremultipliedAlpha,
             PrimitiveKind::Border |
             PrimitiveKind::Image |
             PrimitiveKind::YuvImage |
             PrimitiveKind::AlignedGradient |
             PrimitiveKind::AngleGradient |
             PrimitiveKind::RadialGradient |
             PrimitiveKind::Line |
             PrimitiveKind::Brush |
@@ -594,24 +581,44 @@ fn add_to_batch(
                         ],
                     };
 
                     let kind = BatchKind::Transformable(
                         transform_kind,
                         TransformBatchKind::TextRun(glyph_format),
                     );
 
+                    let blend_mode = match glyph_format {
+                        GlyphFormat::Subpixel |
+                        GlyphFormat::TransformedSubpixel => {
+                            if text_cpu.font.bg_color.a != 0 {
+                                BlendMode::SubpixelWithBgColor
+                            } else {
+                                BlendMode::SubpixelConstantTextColor(text_cpu.font.color.into())
+                            }
+                        }
+                        GlyphFormat::Alpha |
+                        GlyphFormat::TransformedAlpha |
+                        GlyphFormat::Bitmap |
+                        GlyphFormat::ColorBitmap => BlendMode::PremultipliedAlpha,
+                    };
+                    let subpx_dir = match glyph_format {
+                        GlyphFormat::Bitmap |
+                        GlyphFormat::ColorBitmap => SubpixelDirection::None,
+                        _ => text_cpu.font.subpx_dir.limit_by(text_cpu.font.render_mode),
+                    };
+
                     let key = BatchKey::new(kind, blend_mode, textures);
                     let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
 
                     for glyph in glyphs {
                         batch.push(base_instance.build(
                             glyph.index_in_text_run,
                             glyph.uv_rect_address.as_int(),
-                            0,
+                            subpx_dir as u32 as i32,
                         ));
                     }
                 },
             );
         }
         PrimitiveKind::Picture => {
             let picture =
                 &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
@@ -1549,26 +1556,32 @@ impl RenderTarget for ColorRenderTarget 
                                                     RasterizationSpace::Local,
                                                 );
 
                                                 ctx.resource_cache.fetch_glyphs(
                                                     font,
                                                     &text.glyph_keys,
                                                     &mut self.glyph_fetch_buffer,
                                                     gpu_cache,
-                                                    |texture_id, _glyph_format, glyphs| {
+                                                    |texture_id, glyph_format, glyphs| {
                                                         let batch = text_run_cache_prims
                                                             .entry(texture_id)
                                                             .or_insert(Vec::new());
 
+                                                        let subpx_dir = match glyph_format {
+                                                            GlyphFormat::Bitmap |
+                                                            GlyphFormat::ColorBitmap => SubpixelDirection::None,
+                                                            _ => text.font.subpx_dir.limit_by(text.font.render_mode),
+                                                        };
+
                                                         for glyph in glyphs {
                                                             batch.push(instance.build(
                                                                 glyph.index_in_text_run,
                                                                 glyph.uv_rect_address.as_int(),
-                                                                0
+                                                                subpx_dir as u32 as i32,
                                                             ));
                                                         }
                                                     },
                                                 );
                                             }
                                             PrimitiveKind::Line => {
                                                 self.line_cache_prims
                                                     .push(instance.build(0, 0, 0));
@@ -2094,17 +2107,17 @@ fn resolve_image(
         Some(image_properties) => {
             // Check if an external image that needs to be resolved
             // by the render thread.
             match image_properties.external_image {
                 Some(external_image) => {
                     // This is an external texture - we will add it to
                     // the deferred resolves list to be patched by
                     // the render thread...
-                    let cache_handle = gpu_cache.push_deferred_per_frame_blocks(1);
+                    let cache_handle = gpu_cache.push_deferred_per_frame_blocks(2);
                     deferred_resolves.push(DeferredResolve {
                         image_properties,
                         address: gpu_cache.get_address(&cache_handle),
                     });
 
                     (SourceTexture::External(external_image), cache_handle)
                 }
                 None => {
--- a/gfx/webrender/src/util.rs
+++ b/gfx/webrender/src/util.rs
@@ -1,19 +1,18 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, DeviceIntPoint, DeviceIntRect, DeviceIntSize, DevicePoint, DeviceRect};
 use api::{DeviceSize, LayerPoint, LayerRect, LayerSize, LayerToWorldTransform, WorldRect};
-use euclid::{Point2D, Rect, Size2D, TypedPoint2D, TypedRect, TypedSize2D, TypedTransform2D};
-use euclid::TypedTransform3D;
+use euclid::{Point2D, Rect, TypedScale, Size2D, TypedPoint2D, TypedRect, TypedSize2D};
+use euclid::{TypedTransform2D, TypedTransform3D};
 use num_traits::Zero;
-use std::i32;
-use std::f32;
+use std::{i32, f32};
 
 // Matches the definition of SK_ScalarNearlyZero in Skia.
 const NEARLY_ZERO: f32 = 1.0 / 4096.0;
 
 // TODO: Implement these in euclid!
 pub trait MatrixHelpers<Src, Dst> {
     fn preserves_2d_axis_alignment(&self) -> bool;
     fn has_perspective_component(&self) -> bool;
@@ -138,30 +137,32 @@ pub fn lerp(a: f32, b: f32, t: f32) -> f
     (b - a) * t + a
 }
 
 pub fn calculate_screen_bounding_rect(
     transform: &LayerToWorldTransform,
     rect: &LayerRect,
     device_pixel_ratio: f32
 ) -> DeviceIntRect {
-    let rect = WorldRect::from_points(&[
+    let points = [
         transform.transform_point2d(&rect.origin),
         transform.transform_point2d(&rect.top_right()),
         transform.transform_point2d(&rect.bottom_left()),
         transform.transform_point2d(&rect.bottom_right()),
-    ]) * device_pixel_ratio;
+    ];
 
-    let rect = DeviceRect::new(
-        DevicePoint::new(rect.origin.x, rect.origin.y),
-        DeviceSize::new(rect.size.width, rect.size.height),
-    );
+    let scale = TypedScale::new(device_pixel_ratio);
+    let rect: DeviceRect = WorldRect::from_points(&points) * scale;
 
     let max_rect = DeviceRect::max_rect();
-    rect.round_out().intersection(&max_rect).unwrap_or(max_rect).to_i32()
+    rect
+        .round_out()
+        .intersection(&max_rect)
+        .unwrap_or(max_rect)
+        .to_i32()
 }
 
 pub fn _subtract_rect<U>(
     rect: &TypedRect<f32, U>,
     other: &TypedRect<f32, U>,
     results: &mut Vec<TypedRect<f32, U>>,
 ) {
     results.clear();
@@ -265,26 +266,26 @@ pub fn recycle_vec<T>(mut old_vec: Vec<T
 
     return old_vec;
 }
 
 
 #[cfg(test)]
 pub mod test {
     use super::*;
-    use euclid::{Point2D, Radians, Transform3D};
+    use euclid::{Point2D, Angle, Transform3D};
     use std::f32::consts::PI;
 
     #[test]
     fn inverse_project() {
         let m0 = Transform3D::identity();
         let p0 = Point2D::new(1.0, 2.0);
         // an identical transform doesn't need any inverse projection
         assert_eq!(m0.inverse_project(&p0), Some(p0));
-        let m1 = Transform3D::create_rotation(0.0, 1.0, 0.0, Radians::new(PI / 3.0));
+        let m1 = Transform3D::create_rotation(0.0, 1.0, 0.0, Angle::radians(PI / 3.0));
         // rotation by 60 degrees would imply scaling of X component by a factor of 2
         assert_eq!(m1.inverse_project(&p0), Some(Point2D::new(2.0, 2.0)));
     }
 }
 
 pub trait MaxRect {
     fn max_rect() -> Self;
 }
--- a/gfx/webrender_api/Cargo.toml
+++ b/gfx/webrender_api/Cargo.toml
@@ -1,27 +1,28 @@
 [package]
 name = "webrender_api"
-version = "0.55.0"
+version = "0.56.1"
 authors = ["Glenn Watson <gw@intuitionlibrary.com>"]
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 
 [features]
 nightly = ["euclid/unstable", "serde/unstable"]
 ipc = ["ipc-channel"]
 
 [dependencies]
-app_units = "0.5.6"
+app_units = "0.6"
 bitflags = "1.0"
 bincode = "0.9"
 byteorder = "1.2.1"
-euclid = "0.15"
+euclid = "0.16"
 ipc-channel = {version = "0.9", optional = true}
-serde = { version = "1.0", features = ["rc", "derive"] }
+serde = { version = "=1.0.23", features = ["rc", "derive"] }
+serde_derive = { version = "=1.0.23", features = ["deserialize_from"] }
 time = "0.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.4.6"
 core-graphics = "0.12.3"
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.4.1"
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -83,16 +83,17 @@ impl LayerPrimitiveInfo {
             tag: None,
         }
     }
 }
 
 pub type LayoutPrimitiveInfo = PrimitiveInfo<LayoutPixel>;
 pub type LayerPrimitiveInfo = PrimitiveInfo<LayerPixel>;
 
+#[repr(u8)]
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub enum SpecificDisplayItem {
     Clip(ClipDisplayItem),
     ScrollFrame(ScrollFrameDisplayItem),
     StickyFrame(StickyFrameDisplayItem),
     Rectangle(RectangleDisplayItem),
     ClearRectangle,
     Line(LineDisplayItem),
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -220,18 +220,22 @@ impl<'a> BuiltDisplayListIter<'a> {
         self.cur_stops = ItemRange::default();
         self.cur_complex_clip = (ItemRange::default(), 0);
 
         loop {
             if self.data.len() == 0 {
                 return None;
             }
 
-            self.cur_item = bincode::deserialize_from(&mut UnsafeReader::new(&mut self.data), bincode::Infinite)
-                .expect("MEH: malicious process?");
+            {
+                let reader = bincode::read_types::IoReader::new(UnsafeReader::new(&mut self.data));
+                let mut deserializer = bincode::Deserializer::new(reader, bincode::Infinite);
+                self.cur_item.deserialize_from(&mut deserializer)
+                    .expect("MEH: malicious process?");
+            }
 
             match self.cur_item.item {
                 SetGradientStops => {
                     self.cur_stops = skip_slice::<GradientStop>(self.list, &mut self.data).0;
 
                     // This is a dummy item, skip over it
                     continue;
                 }
--- a/gfx/webrender_api/src/font.rs
+++ b/gfx/webrender_api/src/font.rs
@@ -88,17 +88,16 @@ pub enum FontTemplate {
 }
 
 #[repr(u32)]
 #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq, Serialize, Deserialize, Ord, PartialOrd)]
 pub enum FontRenderMode {
     Mono = 0,
     Alpha,
     Subpixel,
-    Bitmap,
 }
 
 #[repr(u32)]
 #[derive(Copy, Clone, Hash, PartialEq, Eq, Debug, Deserialize, Serialize, Ord, PartialOrd)]
 pub enum SubpixelDirection {
     None = 0,
     Horizontal,
     Vertical,
@@ -125,28 +124,27 @@ impl FontRenderMode {
             5...6 => SubpixelOffset::ThreeQuarters,
             _ => unreachable!("bug: unexpected quantized result"),
         }
     }
 
     // Combine two font render modes such that the lesser amount of AA limits the AA of the result.
     pub fn limit_by(self, other: FontRenderMode) -> FontRenderMode {
         match (self, other) {
-            (FontRenderMode::Bitmap, _) | (_, FontRenderMode::Bitmap) => FontRenderMode::Bitmap,
             (FontRenderMode::Subpixel, _) | (_, FontRenderMode::Mono) => other,
             _ => self,
         }
     }
 }
 
 impl SubpixelDirection {
     // Limit the subpixel direction to what is supported by the render mode.
     pub fn limit_by(self, render_mode: FontRenderMode) -> SubpixelDirection {
         match render_mode {
-            FontRenderMode::Mono | FontRenderMode::Bitmap => SubpixelDirection::None,
+            FontRenderMode::Mono => SubpixelDirection::None,
             FontRenderMode::Alpha | FontRenderMode::Subpixel => self,
         }
     }
 }
 
 #[repr(u8)]
 #[derive(Hash, Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Serialize, Deserialize)]
 pub enum SubpixelOffset {
--- a/gfx/webrender_bindings/Cargo.toml
+++ b/gfx/webrender_bindings/Cargo.toml
@@ -2,24 +2,24 @@
 name = "webrender_bindings"
 version = "0.1.0"
 authors = ["The Mozilla Project Developers"]
 license = "MPL-2.0"
 
 [dependencies]
 rayon = "0.8"
 thread_profiler = "0.1.1"
-euclid = "0.15"
-app_units = "0.5.6"
+euclid = "0.16"
+app_units = "0.6"
 gleam = "0.4.15"
 log = "0.3"
 
 [dependencies.webrender]
 path = "../webrender"
-version = "0.55.0"
+version = "0.56.1"
 default-features = false
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.4.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.4.6"
 core-graphics = "0.12.3"
--- a/gfx/webrender_bindings/webrender_ffi_generated.h
+++ b/gfx/webrender_bindings/webrender_ffi_generated.h
@@ -87,17 +87,16 @@ enum class FontLCDFilter : uint8_t {
   Sentinel /* this must be last for serialization purposes. */
 };
 #endif
 
 enum class FontRenderMode : uint32_t {
   Mono = 0,
   Alpha = 1,
   Subpixel = 2,
-  Bitmap = 3,
 
   Sentinel /* this must be last for serialization purposes. */
 };
 
 enum class ImageFormat : uint32_t {
   Invalid = 0,
   A8 = 1,
   RGB8 = 2,