Bug 1479432 - Update webrender to commit 7a1b919e37d6cd0155077aa90f98cfcdf9fa5bae. r?jrmuizel draft
authorKartikaya Gupta <kgupta@mozilla.com>
Thu, 02 Aug 2018 10:20:04 -0400
changeset 825862 0a520d740688b67636a2f311597f25c405528c7f
parent 825858 a2d65d03e46a9a42b5bee5c2a7864d3f987a8ca7
child 825863 54830361fde6933d0b63cc5ffe2b88b9b29c6d7e
push id118189
push userkgupta@mozilla.com
push dateThu, 02 Aug 2018 14:21:22 +0000
reviewersjrmuizel
bugs1479432
milestone63.0a1
Bug 1479432 - Update webrender to commit 7a1b919e37d6cd0155077aa90f98cfcdf9fa5bae. r?jrmuizel MozReview-Commit-ID: 1SJgRWEp2qf
gfx/webrender/Cargo.toml
gfx/webrender/res/brush.glsl
gfx/webrender/res/brush_blend.glsl
gfx/webrender/res/brush_image.glsl
gfx/webrender/res/brush_mix_blend.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/src/batch.rs
gfx/webrender/src/border.rs
gfx/webrender/src/clip.rs
gfx/webrender/src/clip_node.rs
gfx/webrender/src/clip_scroll_tree.rs
gfx/webrender/src/device/query_gl.rs
gfx/webrender/src/display_list_flattener.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/gpu_types.rs
gfx/webrender/src/hit_test.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/spatial_node.rs
gfx/webrender/src/util.rs
gfx/webrender_api/Cargo.toml
gfx/webrender_api/src/display_list.rs
gfx/webrender_bindings/Cargo.toml
gfx/webrender_bindings/revision.txt
gfx/wrench/Cargo.toml
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -13,30 +13,30 @@ profiler = ["thread_profiler/thread_prof
 debugger = ["ws", "serde_json", "serde", "image", "base64", "debug_renderer"]
 capture = ["webrender_api/serialize", "ron", "serde", "debug_renderer"]
 replay = ["webrender_api/deserialize", "ron", "serde"]
 debug_renderer = []
 pathfinder = ["pathfinder_font_renderer", "pathfinder_gfx_utils", "pathfinder_partitioner", "pathfinder_path_utils"]
 serialize_program = ["serde"]
 
 [dependencies]
-app_units = "0.6"
+app_units = "0.7"
 base64 = { optional = true, version = "0.6" }
 bincode = "1.0"
 bitflags = "1.0"
 byteorder = "1.0"
 cfg-if = "0.1.2"
-euclid = "0.18"
+euclid = "0.19"
 fxhash = "0.2.1"
 gleam = "0.6"
 image = { optional = true, version = "0.19" }
 lazy_static = "1"
 log = "0.4"
-num-traits = "0.1.43"
-plane-split = "0.10"
+num-traits = "0.2"
+plane-split = "0.12"
 png = { optional = true, version = "0.12" }
 rayon = "1"
 ron = { optional = true, version = "0.1.7" }
 serde = { optional = true, version = "1.0", features = ["serde_derive"] }
 serde_json = { optional = true, version = "1.0" }
 smallvec = "0.6"
 thread_profiler = "0.1.1"
 time = "0.1"
--- a/gfx/webrender/res/brush.glsl
+++ b/gfx/webrender/res/brush.glsl
@@ -65,39 +65,37 @@ void main(void) {
         //           items. For now, just ensure it has no
         //           effect. We can tidy this up as we move
         //           more items to be brush shaders.
 #ifdef WR_FEATURE_ALPHA_PASS
         init_transform_vs(vec4(vec2(-1000000.0), vec2(1000000.0)));
 #endif
     } else {
         bvec4 edge_mask = notEqual(edge_flags & ivec4(1, 2, 4, 8), ivec4(0));
-        bool do_perspective_interpolation = (brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0;
 
         vi = write_transform_vertex(
             local_segment_rect,
             ph.local_rect,
             ph.local_clip_rect,
             mix(vec4(0.0), vec4(1.0), edge_mask),
             ph.z,
             transform,
-            pic_task,
-            do_perspective_interpolation
+            pic_task
         );
     }
 
     // For brush instances in the alpha pass, always write
     // out clip information.
     // TODO(gw): It's possible that we might want alpha
     //           shaders that don't clip in the future,
     //           but it's reasonable to assume that one
     //           implies the other, for now.
 #ifdef WR_FEATURE_ALPHA_PASS
     write_clip(
-        vi.screen_pos,
+        vi.world_pos,
         clip_area
     );
 #endif
 
     // Run the specific brush VS code to write interpolators.
     brush_vs(
         vi,
         ph.specific_prim_address,
--- a/gfx/webrender/res/brush_blend.glsl
+++ b/gfx/webrender/res/brush_blend.glsl
@@ -24,19 +24,19 @@ void brush_vs(
     ivec3 user_data,
     mat4 transform,
     PictureTask pic_task,
     int brush_flags,
     vec4 unused
 ) {
     PictureTask src_task = fetch_picture_task(user_data.x);
     vec2 texture_size = vec2(textureSize(sColor0, 0).xy);
-    vec2 uv = vi.snapped_device_pos +
-              src_task.common_data.task_rect.p0 -
-              src_task.content_origin;
+    vec2 uv = snap_device_pos(vi) +
+        src_task.common_data.task_rect.p0 -
+        src_task.content_origin;
     vUv = vec3(uv / texture_size, src_task.common_data.texture_layer_index);
 
     vec2 uv0 = src_task.common_data.task_rect.p0;
     vec2 uv1 = uv0 + src_task.common_data.task_rect.size;
     vUvClipBounds = vec4(uv0, uv1) / texture_size.xyxy;
 
     float lumR = 0.2126;
     float lumG = 0.7152;
--- a/gfx/webrender/res/brush_image.glsl
+++ b/gfx/webrender/res/brush_image.glsl
@@ -6,17 +6,18 @@
 
 #include shared,prim_shared,brush
 
 #ifdef WR_FEATURE_ALPHA_PASS
 varying vec2 vLocalPos;
 #endif
 
 // Interpolated uv coordinates in xy, and layer in z.
-varying vec3 vUv;
+// W is 1 when perspective interpolation is enabled.
+varying vec4 vUv;
 // Normalized bounds of the source image in the texture.
 flat varying vec4 vUvBounds;
 // Normalized bounds of the source image in the texture, adjusted to avoid
 // sampling artifacts.
 flat varying vec4 vUvSampleBounds;
 
 #ifdef WR_FEATURE_ALPHA_PASS
 flat varying vec4 vColor;
@@ -101,16 +102,17 @@ void brush_vs(
             stretch_size.y = (texel_rect.w - texel_rect.y) / uDevicePixelRatio;
         }
 
         uv0 = res.uv_rect.p0 + texel_rect.xy;
         uv1 = res.uv_rect.p0 + texel_rect.zw;
     }
 
     vUv.z = res.layer;
+    vUv.w = (brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0 ? 1.0 : 0.0;
 
     // Handle case where the UV coords are inverted (e.g. from an
     // external image).
     vec2 min_uv = min(uv0, uv1);
     vec2 max_uv = max(uv0, uv1);
 
     vUvSampleBounds = vec4(
         min_uv + vec2(0.5),
@@ -146,16 +148,20 @@ void brush_vs(
     }
 #endif
 
     // Offset and scale vUv here to avoid doing it in the fragment shader.
     vec2 repeat = local_rect.size / stretch_size;
     vUv.xy = mix(uv0, uv1, f) - min_uv;
     vUv.xy /= texture_size;
     vUv.xy *= repeat.xy;
+    if ((brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) == 0) {
+        // Multiply by W to compensate for perspective interpolation.
+        vUv.xy *= gl_Position.w;
+    }
 
 #ifdef WR_FEATURE_TEXTURE_RECT
     vUvBounds = vec4(0.0, 0.0, vec2(textureSize(sColor0)));
 #else
     vUvBounds = vec4(min_uv, max_uv) / texture_size.xyxy;
 #endif
 
 #ifdef WR_FEATURE_ALPHA_PASS
@@ -191,38 +197,40 @@ void brush_vs(
 #endif
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 
 Fragment brush_fs() {
     vec2 uv_size = vUvBounds.zw - vUvBounds.xy;
+    // Unapply the W scaler when no perspective interpolation is enabled.
+    vec2 base_uv = vUv.xy * mix(gl_FragCoord.w, 1.0, vUv.w);
 
 #ifdef WR_FEATURE_ALPHA_PASS
     // This prevents the uv on the top and left parts of the primitive that was inflated
     // for anti-aliasing purposes from going beyound the range covered by the regular
     // (non-inflated) primitive.
-    vec2 local_uv = max(vUv.xy, vec2(0.0));
+    vec2 local_uv = max(base_uv, vec2(0.0));
 
     // Handle horizontal and vertical repetitions.
     vec2 repeated_uv = mod(local_uv, uv_size) + vUvBounds.xy;
 
     // This takes care of the bottom and right inflated parts.
     // We do it after the modulo because the latter wraps around the values exactly on
     // the right and bottom edges, which we do not want.
     if (local_uv.x >= vTileRepeat.x * uv_size.x) {
         repeated_uv.x = vUvBounds.z;
     }
     if (local_uv.y >= vTileRepeat.y * uv_size.y) {
         repeated_uv.y = vUvBounds.w;
     }
 #else
     // Handle horizontal and vertical repetitions.
-    vec2 repeated_uv = mod(vUv.xy, uv_size) + vUvBounds.xy;
+    vec2 repeated_uv = mod(base_uv, uv_size) + vUvBounds.xy;
 #endif
 
     // Clamp the uvs to avoid sampling artifacts.
     vec2 uv = clamp(repeated_uv, vUvSampleBounds.xy, vUvSampleBounds.zw);
 
     vec4 texel = TEX_SAMPLE(sColor0, vec3(uv, vUv.z));
 
     Fragment frag;
--- a/gfx/webrender/res/brush_mix_blend.glsl
+++ b/gfx/webrender/res/brush_mix_blend.glsl
@@ -18,27 +18,28 @@ void brush_vs(
     RectWithSize local_rect,
     RectWithSize segment_rect,
     ivec3 user_data,
     mat4 transform,
     PictureTask pic_task,
     int brush_flags,
     vec4 unused
 ) {
+    vec2 snapped_device_pos = snap_device_pos(vi);
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
     vOp = user_data.x;
 
     PictureTask src_task = fetch_picture_task(user_data.z);
-    vec2 src_uv = vi.snapped_device_pos +
+    vec2 src_uv = snapped_device_pos +
                   src_task.common_data.task_rect.p0 -
                   src_task.content_origin;
     vSrcUv = vec3(src_uv / texture_size, src_task.common_data.texture_layer_index);
 
     RenderTaskCommonData backdrop_task = fetch_render_task_common_data(user_data.y);
-    vec2 backdrop_uv = vi.snapped_device_pos +
+    vec2 backdrop_uv = snapped_device_pos +
                        backdrop_task.task_rect.p0 -
                        src_task.content_origin;
     vBackdropUv = vec3(backdrop_uv / texture_size, backdrop_task.texture_layer_index);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 vec3 Multiply(vec3 Cb, vec3 Cs) {
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -22,17 +22,18 @@ uniform sampler2DArray sCacheRGBA8;
 uniform sampler2DArray sSharedCacheA8;
 
 vec2 clamp_rect(vec2 pt, RectWithSize rect) {
     return clamp(pt, rect.p0, rect.p0 + rect.size);
 }
 
 // TODO: convert back to RectWithEndPoint if driver issues are resolved, if ever.
 flat varying vec4 vClipMaskUvBounds;
-varying vec3 vClipMaskUv;
+// XY and W are homogeneous coordinates, Z is the layer index
+varying vec4 vClipMaskUv;
 
 
 #ifdef WR_VERTEX_SHADER
 
 #define COLOR_MODE_FROM_PASS          0
 #define COLOR_MODE_ALPHA              1
 #define COLOR_MODE_SUBPX_CONST_COLOR  2
 #define COLOR_MODE_SUBPX_BG_PASS0     3
@@ -81,21 +82,25 @@ PrimitiveHeader fetch_prim_header(int in
     ph.transform_id = data1.x;
     ph.user_data = data1.yzw;
 
     return ph;
 }
 
 struct VertexInfo {
     vec2 local_pos;
-    vec2 screen_pos;
-    float w;
-    vec2 snapped_device_pos;
+    vec2 snap_offset;
+    vec4 world_pos;
 };
 
+//Note: this function is unsafe for `vi.world_pos.w <= 0.0`
+vec2 snap_device_pos(VertexInfo vi) {
+    return vi.world_pos.xy * uDevicePixelRatio / max(0.0, vi.world_pos.w) + vi.snap_offset;
+}
+
 VertexInfo write_vertex(RectWithSize instance_rect,
                         RectWithSize local_clip_rect,
                         float z,
                         Transform transform,
                         PictureTask task,
                         RectWithSize snap_rect) {
 
     // Select the corner of the local rect that we are processing.
@@ -114,28 +119,26 @@ VertexInfo write_vertex(RectWithSize ins
 
     // Transform the current vertex to world space.
     vec4 world_pos = transform.m * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
 
     // Apply offsets for the render task to get correct screen location.
-    vec2 snapped_device_pos = device_pos + snap_offset;
-    vec2 final_pos = snapped_device_pos -
+    vec2 final_pos = device_pos + snap_offset -
                      task.content_origin +
                      task.common_data.task_rect.p0;
 
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
     VertexInfo vi = VertexInfo(
         clamped_local_pos,
-        device_pos,
-        world_pos.w,
-        snapped_device_pos
+        snap_offset,
+        world_pos
     );
 
     return vi;
 }
 
 float cross2(vec2 v0, vec2 v1) {
     return v0.x * v1.y - v0.y * v1.x;
 }
@@ -156,18 +159,17 @@ vec2 intersect_lines(vec2 p0, vec2 p1, v
 }
 
 VertexInfo write_transform_vertex(RectWithSize local_segment_rect,
                                   RectWithSize local_prim_rect,
                                   RectWithSize local_clip_rect,
                                   vec4 clip_edge_mask,
                                   float z,
                                   Transform transform,
-                                  PictureTask task,
-                                  bool do_perspective_interpolation) {
+                                  PictureTask task) {
     // Calculate a clip rect from local_rect + local clip
     RectWithEndpoint clip_rect = to_rect_with_endpoint(local_clip_rect);
     RectWithEndpoint segment_rect = to_rect_with_endpoint(local_segment_rect);
     segment_rect.p0 = clamp(segment_rect.p0, clip_rect.p0, clip_rect.p1);
     segment_rect.p1 = clamp(segment_rect.p1, clip_rect.p0, clip_rect.p1);
 
     // Calculate a clip rect from local_rect + local clip
     RectWithEndpoint prim_rect = to_rect_with_endpoint(local_prim_rect);
@@ -187,73 +189,75 @@ VertexInfo write_transform_vertex(RectWi
     float extrude_amount = 2.0;
     vec4 extrude_distance = vec4(extrude_amount) * clip_edge_mask;
     local_segment_rect.p0 -= extrude_distance.xy;
     local_segment_rect.size += extrude_distance.xy + extrude_distance.zw;
 
     // Select the corner of the local rect that we are processing.
     vec2 local_pos = local_segment_rect.p0 + local_segment_rect.size * aPosition.xy;
 
-    // Transform the current vertex to the world cpace.
-    vec4 world_pos = transform.m * vec4(local_pos, 0.0, 1.0);
-
     // Convert the world positions to device pixel space.
-    vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
     vec2 task_offset = task.common_data.task_rect.p0 - task.content_origin;
 
-    // Force w = 1, if we don't want perspective interpolation (for
-    // example, drawing a screen-space quad on an element with a
-    // perspective transform).
-    world_pos.w = mix(1.0, world_pos.w, do_perspective_interpolation);
+    // Transform the current vertex to the world cpace.
+    vec4 world_pos = transform.m * vec4(local_pos, 0.0, 1.0);
+    vec4 final_pos = vec4(
+        world_pos.xy * uDevicePixelRatio + task_offset * world_pos.w,
+        z * world_pos.w,
+        world_pos.w
+    );
 
-    // We want the world space coords to be perspective divided by W.
-    // We also want that to apply to any interpolators. However, we
-    // want a constant Z across the primitive, since we're using it
-    // for draw ordering - so scale by the W coord to ensure this.
-    vec4 final_pos = vec4(device_pos + task_offset, z, 1.0) * world_pos.w;
     gl_Position = uTransform * final_pos;
 
     init_transform_vs(mix(
         vec4(prim_rect.p0, prim_rect.p1),
         vec4(segment_rect.p0, segment_rect.p1),
         clip_edge_mask
     ));
 
     VertexInfo vi = VertexInfo(
         local_pos,
-        device_pos,
-        world_pos.w,
-        device_pos
+        vec2(0.0),
+        world_pos
     );
 
     return vi;
 }
 
-void write_clip(vec2 global_pos, ClipArea area) {
-    vec2 uv = global_pos +
-              area.common_data.task_rect.p0 -
-              area.screen_origin;
+void write_clip(vec4 world_pos, ClipArea area) {
+    vec2 uv = world_pos.xy * uDevicePixelRatio +
+        world_pos.w * (area.common_data.task_rect.p0 - area.screen_origin);
     vClipMaskUvBounds = vec4(
         area.common_data.task_rect.p0,
         area.common_data.task_rect.p0 + area.common_data.task_rect.size
     );
-    vClipMaskUv = vec3(uv, area.common_data.texture_layer_index);
+    vClipMaskUv = vec4(uv, area.common_data.texture_layer_index, world_pos.w);
 }
 #endif //WR_VERTEX_SHADER
 
 #ifdef WR_FRAGMENT_SHADER
 
 float do_clip() {
+    // check for the dummy bounds, which are given to the opaque objects
+    if (vClipMaskUvBounds.xy == vClipMaskUvBounds.zw) {
+        return 1.0;
+    }
     // anything outside of the mask is considered transparent
+    //Note: we assume gl_FragCoord.w == interpolated(1 / vClipMaskUv.w)
+    vec2 mask_uv = vClipMaskUv.xy * gl_FragCoord.w;
     bvec4 inside = lessThanEqual(
-        vec4(vClipMaskUvBounds.xy, vClipMaskUv.xy),
-        vec4(vClipMaskUv.xy, vClipMaskUvBounds.zw));
-    // check for the dummy bounds, which are given to the opaque objects
-    return vClipMaskUvBounds.xy == vClipMaskUvBounds.zw ? 1.0:
-        all(inside) ? texelFetch(sCacheA8, ivec3(vClipMaskUv), 0).r : 0.0;
+        vec4(vClipMaskUvBounds.xy, mask_uv),
+        vec4(mask_uv, vClipMaskUvBounds.zw));
+    // bail out if the pixel is outside the valid bounds
+    if (!all(inside)) {
+        return 0.0;
+    }
+    // finally, the slow path - fetch the mask value from an image
+    ivec3 tc = ivec3(mask_uv, vClipMaskUv.z);
+    return texelFetch(sCacheA8, tc, 0).r;
 }
 
 #ifdef WR_FEATURE_DITHERING
 vec4 dither(vec4 color) {
     const int matrix_mask = 7;
 
     ivec2 pos = ivec2(gl_FragCoord.xy) & ivec2(matrix_mask);
     float noise_normalized = (texelFetch(sDither, pos, 0).r * 255.0 + 0.5) / 64.0;
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -68,62 +68,62 @@ VertexInfo write_text_vertex(vec2 clampe
                              RectWithSize snap_rect,
                              vec2 snap_bias) {
     // Transform the current vertex to world space.
     vec4 world_pos = transform.m * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     float device_scale = uDevicePixelRatio / world_pos.w;
     vec2 device_pos = world_pos.xy * device_scale;
-
-    // Apply offsets for the render task to get correct screen location.
-    vec2 final_pos = device_pos -
-                     task.content_origin +
-                     task.common_data.task_rect.p0;
+    vec2 snap_offset = vec2(0.0);
 
 #if defined(WR_FEATURE_GLYPH_TRANSFORM)
     bool remove_subpx_offset = true;
 #else
-    // Compute the snapping offset only if the scroll node transform is axis-aligned.
     bool remove_subpx_offset = transform.is_axis_aligned;
 #endif
+    // Compute the snapping offset only if the scroll node transform is axis-aligned.
     if (remove_subpx_offset) {
         // Ensure the transformed text offset does not contain a subpixel translation
         // such that glyph snapping is stable for equivalent glyph subpixel positions.
         vec2 world_text_offset = mat2(transform.m) * text_offset;
         vec2 device_text_pos = (transform.m[3].xy + world_text_offset) * device_scale;
-        final_pos += floor(device_text_pos + 0.5) - device_text_pos;
+        snap_offset += floor(device_text_pos + 0.5) - device_text_pos;
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
         // For transformed subpixels, we just need to align the glyph origin to a device pixel.
         // The transformed text offset has already been snapped, so remove it from the glyph
         // origin when snapping the glyph.
-        vec2 snap_offset = snap_rect.p0 - world_text_offset * device_scale;
-        final_pos += floor(snap_offset + snap_bias) - snap_offset;
+        vec2 rough_offset = snap_rect.p0 - world_text_offset * device_scale;
+        snap_offset += floor(rough_offset + snap_bias) - rough_offset;
 #else
         // The transformed text offset has already been snapped, so remove it from the transform
         // when snapping the glyph.
         mat4 snap_transform = transform.m;
         snap_transform[3].xy = -world_text_offset;
-        final_pos += compute_snap_offset(
+        snap_offset += compute_snap_offset(
             clamped_local_pos,
             snap_transform,
             snap_rect,
             snap_bias
         );
 #endif
     }
 
+    // Apply offsets for the render task to get correct screen location.
+    vec2 final_pos = device_pos + snap_offset -
+                     task.content_origin +
+                     task.common_data.task_rect.p0;
+
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
     VertexInfo vi = VertexInfo(
         clamped_local_pos,
-        device_pos,
-        world_pos.w,
-        final_pos
+        snap_offset,
+        world_pos
     );
 
     return vi;
 }
 
 void main(void) {
     int prim_header_address = aData.x;
     int glyph_index = aData.y;
@@ -218,17 +218,17 @@ void main(void) {
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
     vec2 f = (glyph_transform * vi.local_pos - glyph_rect.p0) / glyph_rect.size;
     vUvClip = vec4(f, 1.0 - f);
 #else
     vec2 f = (vi.local_pos - glyph_rect.p0) / glyph_rect.size;
 #endif
 
-    write_clip(vi.screen_pos, clip_area);
+    write_clip(vi.world_pos, clip_area);
 
     switch (color_mode) {
         case COLOR_MODE_ALPHA:
         case COLOR_MODE_BITMAP:
             vMaskSwizzle = vec2(0.0, 1.0);
             vColor = text.color;
             break;
         case COLOR_MODE_SUBPX_BG_PASS2:
--- a/gfx/webrender/src/batch.rs
+++ b/gfx/webrender/src/batch.rs
@@ -1783,17 +1783,17 @@ impl ClipBatcher {
         coordinate_system_id: CoordinateSystemId,
         resource_cache: &ResourceCache,
         gpu_cache: &GpuCache,
         clip_store: &ClipStore,
         transforms: &TransformPalette,
     ) {
         let mut coordinate_system_id = coordinate_system_id;
         for work_item in clips.iter() {
-            let info = clip_store.get(work_item.clip_sources_index);
+            let info = &clip_store[work_item.clip_sources_index];
             let instance = ClipMaskInstance {
                 render_task_address: task_address,
                 transform_id: transforms.get_id(info.spatial_node_index),
                 segment: 0,
                 clip_data_address: GpuCacheAddress::invalid(),
                 resource_address: GpuCacheAddress::invalid(),
             };
 
--- a/gfx/webrender/src/border.rs
+++ b/gfx/webrender/src/border.rs
@@ -4,20 +4,30 @@
 
 use api::{BorderRadius, BorderSide, BorderStyle, BorderWidths, ColorF};
 use api::{ColorU, DeviceRect, DeviceSize, LayoutSizeAu, LayoutPrimitiveInfo, LayoutToDeviceScale};
 use api::{DevicePixel, DeviceVector2D, DevicePoint, DeviceIntSize, LayoutRect, LayoutSize, NormalBorder};
 use app_units::Au;
 use ellipse::Ellipse;
 use display_list_flattener::DisplayListFlattener;
 use gpu_types::{BorderInstance, BorderSegment, BrushFlags};
-use prim_store::{BrushKind, BrushPrimitive, BrushSegment};
+use prim_store::{BrushKind, BrushPrimitive, BrushSegment, VECS_PER_SEGMENT};
 use prim_store::{BorderSource, EdgeAaSegmentMask, PrimitiveContainer, ScrollNodeAndClipChain};
+use renderer::{MAX_VERTEX_TEXTURE_WIDTH};
 use util::{lerp, RectHelpers};
 
+// Using 2048 as the maximum radius in device space before which we
+// start stretching is up for debate.
+// the value must be chosen so that the corners will not use an
+// unreasonable amount of memory but should allow crisp corners in the
+// common cases.
+
+/// Maximum resolution in device pixels at which borders are rasterized.
+pub const MAX_BORDER_RESOLUTION: u32 = 2048;
+
 trait AuSizeConverter {
     fn to_au(&self) -> LayoutSizeAu;
 }
 
 impl AuSizeConverter for LayoutSize {
     fn to_au(&self) -> LayoutSizeAu {
         LayoutSizeAu::new(
             Au::from_f32_px(self.width),
@@ -340,28 +350,34 @@ impl BorderCornerClipSource {
             outer_scale.x * self.radius.width,
             outer_scale.y * self.radius.height,
         );
         let clip_sign = DeviceVector2D::new(
             1.0 - 2.0 * outer_scale.x,
             1.0 - 2.0 * outer_scale.y,
         );
 
+        // No point in pushing more clips as it will blow up the maximum amount of
+        // segments per primitive later down the road.
+        // See #2915 for a better fix.
+        let clip_limit = MAX_VERTEX_TEXTURE_WIDTH / VECS_PER_SEGMENT;
+        let max_clip_count = self.max_clip_count.min(clip_limit);
+
         match self.kind {
             BorderCornerClipKind::Dash => {
                 // Get the correct dash arc length.
                 let dash_arc_length =
-                    0.5 * self.ellipse.total_arc_length / self.max_clip_count as f32;
+                    0.5 * self.ellipse.total_arc_length / max_clip_count as f32;
                 // Start the first dash at one quarter the length of a single dash
                 // along the arc line. This is arbitrary but looks reasonable in
                 // most cases. We need to spend some time working on a more
                 // sophisticated dash placement algorithm that takes into account
                 // the offset of the dashes along edge segments.
                 let mut current_arc_length = 0.25 * dash_arc_length;
-                for _ in 0 .. self.max_clip_count {
+                for _ in 0 .. max_clip_count {
                     let arc_length0 = current_arc_length;
                     current_arc_length += dash_arc_length;
 
                     let arc_length1 = current_arc_length;
                     current_arc_length += dash_arc_length;
 
                     let alpha = self.ellipse.find_angle_for_arc_length(arc_length0);
                     let beta =  self.ellipse.find_angle_for_arc_length(arc_length1);
@@ -396,17 +412,17 @@ impl BorderCornerClipSource {
                         tangent0.y,
                         point1.x,
                         point1.y,
                         tangent1.x,
                         tangent1.y,
                     ]);
                 }
             }
-            BorderCornerClipKind::Dot if self.max_clip_count == 1 => {
+            BorderCornerClipKind::Dot if max_clip_count == 1 => {
                 let dot_diameter = lerp(self.widths.width, self.widths.height, 0.5);
                 dot_dash_data.push([
                     self.widths.width / 2.0, self.widths.height / 2.0, 0.5 * dot_diameter, 0.,
                     0., 0., 0., 0.,
                 ]);
             }
             BorderCornerClipKind::Dot => {
                 let mut forward_dots = Vec::new();
@@ -417,17 +433,17 @@ impl BorderCornerClipSource {
                 // ellipse arc. This ensures that we always end up with an exact
                 // half dot at each end of the arc, to match up with the edges.
                 forward_dots.push(DotInfo::new(self.widths.width, self.widths.width));
                 back_dots.push(DotInfo::new(
                     self.ellipse.total_arc_length - self.widths.height,
                     self.widths.height,
                 ));
 
-                for dot_index in 0 .. self.max_clip_count {
+                for dot_index in 0 .. max_clip_count {
                     let prev_forward_pos = *forward_dots.last().unwrap();
                     let prev_back_pos = *back_dots.last().unwrap();
 
                     // Select which end of the arc to place a dot from.
                     // This just alternates between the start and end of
                     // the arc, which ensures that there is always an
                     // exact half-dot at each end of the ellipse.
                     let going_forward = dot_index & 1 == 0;
@@ -922,16 +938,33 @@ impl BorderRenderTaskInfo {
                 &mut instances,
                 info.widths,
                 info.radius,
             );
         }
 
         instances
     }
+
+    /// Computes the maximum scale that we allow for this set of border radii.
+    /// capping the scale will result in rendering very large corners at a lower
+    /// resolution and stretching them, so they will have the right shape, but
+    /// blurrier.
+    pub fn get_max_scale(radii: &BorderRadius) -> LayoutToDeviceScale {
+        let r = radii.top_left.width
+            .max(radii.top_left.height)
+            .max(radii.top_right.width)
+            .max(radii.top_right.height)
+            .max(radii.bottom_left.width)
+            .max(radii.bottom_left.height)
+            .max(radii.bottom_right.width)
+            .max(radii.bottom_right.height);
+
+        LayoutToDeviceScale::new(MAX_BORDER_RESOLUTION as f32 / r)
+    }
 }
 
 fn add_brush_segment(
     image_rect: LayoutRect,
     task_rect: DeviceRect,
     brush_flags: BrushFlags,
     edge_flags: EdgeAaSegmentMask,
     brush_segments: &mut Vec<BrushSegment>,
--- a/gfx/webrender/src/clip.rs
+++ b/gfx/webrender/src/clip.rs
@@ -9,110 +9,142 @@ use border::{ensure_no_corner_overlap};
 use box_shadow::{BLUR_SAMPLE_SCALE, BoxShadowClipSource, BoxShadowCacheKey};
 use clip_scroll_tree::{ClipChainIndex, CoordinateSystemId, SpatialNodeIndex};
 use ellipse::Ellipse;
 use gpu_cache::{GpuCache, GpuCacheHandle, ToGpuBlocks};
 use gpu_types::BoxShadowStretchMode;
 use prim_store::{ClipData, ImageMaskData};
 use render_task::to_cache_size;
 use resource_cache::{ImageRequest, ResourceCache};
-use util::{LayoutToWorldFastTransform, MaxRect, calculate_screen_bounding_rect};
-use util::{extract_inner_rect_safe, pack_as_float, recycle_vec};
+use util::{LayoutToWorldFastTransform, MaxRect, TransformedRectKind};
+use util::{calculate_screen_bounding_rect, extract_inner_rect_safe, pack_as_float, recycle_vec};
+use std::{iter, ops};
 use std::sync::Arc;
 
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ClipSourcesIndex(usize);
 
 pub struct ClipStore {
     clip_sources: Vec<ClipSources>,
 }
 
 impl ClipStore {
-    pub fn new() -> ClipStore {
+    pub fn new() -> Self {
         ClipStore {
             clip_sources: Vec::new(),
         }
     }
 
-    pub fn recycle(self) -> ClipStore {
+    pub fn recycle(self) -> Self {
         ClipStore {
             clip_sources: recycle_vec(self.clip_sources),
         }
     }
 
     pub fn insert(&mut self, clip_sources: ClipSources) -> ClipSourcesIndex {
         let index = ClipSourcesIndex(self.clip_sources.len());
         self.clip_sources.push(clip_sources);
         index
     }
+}
 
-    pub fn get(&self, index: ClipSourcesIndex) -> &ClipSources {
+impl ops::Index<ClipSourcesIndex> for ClipStore {
+    type Output = ClipSources;
+    fn index(&self, index: ClipSourcesIndex) -> &Self::Output {
         &self.clip_sources[index.0]
     }
+}
 
-    pub fn get_mut(&mut self, index: ClipSourcesIndex) -> &mut ClipSources {
+impl ops::IndexMut<ClipSourcesIndex> for ClipStore {
+    fn index_mut(&mut self, index: ClipSourcesIndex) -> &mut Self::Output {
         &mut self.clip_sources[index.0]
     }
 }
 
 #[derive(Debug)]
 pub struct LineDecorationClipSource {
     rect: LayoutRect,
     style: LineStyle,
     orientation: LineOrientation,
     wavy_line_thickness: f32,
 }
 
-#[derive(Clone, Debug)]
-pub struct ClipRegion {
-    pub main: LayoutRect,
-    pub image_mask: Option<ImageMask>,
-    pub complex_clips: Vec<ComplexClipRegion>,
+
+pub struct ComplexTranslateIter<I> {
+    source: I,
+    offset: LayoutVector2D,
+}
+
+impl<I: Iterator<Item = ComplexClipRegion>> Iterator for ComplexTranslateIter<I> {
+    type Item = ComplexClipRegion;
+    fn next(&mut self) -> Option<Self::Item> {
+        self.source
+            .next()
+            .map(|mut complex| {
+                complex.rect = complex.rect.translate(&self.offset);
+                complex
+            })
+    }
 }
 
-impl ClipRegion {
+#[derive(Clone, Debug)]
+pub struct ClipRegion<I> {
+    pub main: LayoutRect,
+    pub image_mask: Option<ImageMask>,
+    pub complex_clips: I,
+}
+
+impl<J> ClipRegion<ComplexTranslateIter<J>> {
     pub fn create_for_clip_node(
         rect: LayoutRect,
-        mut complex_clips: Vec<ComplexClipRegion>,
+        complex_clips: J,
         mut image_mask: Option<ImageMask>,
         reference_frame_relative_offset: &LayoutVector2D,
-    ) -> ClipRegion {
-        let rect = rect.translate(reference_frame_relative_offset);
-
+    ) -> Self
+    where
+        J: Iterator<Item = ComplexClipRegion>
+    {
         if let Some(ref mut image_mask) = image_mask {
             image_mask.rect = image_mask.rect.translate(reference_frame_relative_offset);
         }
 
-        for complex_clip in complex_clips.iter_mut() {
-            complex_clip.rect = complex_clip.rect.translate(reference_frame_relative_offset);
-        }
-
         ClipRegion {
-            main: rect,
+            main: rect.translate(reference_frame_relative_offset),
             image_mask,
-            complex_clips,
+            complex_clips: ComplexTranslateIter {
+                source: complex_clips,
+                offset: *reference_frame_relative_offset,
+            },
         }
     }
+}
 
+impl ClipRegion<Option<ComplexClipRegion>> {
     pub fn create_for_clip_node_with_local_clip(
         local_clip: &LocalClip,
         reference_frame_relative_offset: &LayoutVector2D
-    ) -> ClipRegion {
-        let complex_clips = match *local_clip {
-            LocalClip::Rect(_) => Vec::new(),
-            LocalClip::RoundedRect(_, ref region) => vec![region.clone()],
-        };
-        ClipRegion::create_for_clip_node(
-            *local_clip.clip_rect(),
-            complex_clips,
-            None,
-            reference_frame_relative_offset
-        )
+    ) -> Self {
+        ClipRegion {
+            main: local_clip
+                .clip_rect()
+                .translate(reference_frame_relative_offset),
+            image_mask: None,
+            complex_clips: match *local_clip {
+                LocalClip::Rect(_) => None,
+                LocalClip::RoundedRect(_, ref region) => {
+                    Some(ComplexClipRegion {
+                        rect: region.rect.translate(reference_frame_relative_offset),
+                        radii: region.radii,
+                        mode: region.mode,
+                    })
+                },
+            }
+        }
     }
 }
 
 #[derive(Debug)]
 pub enum ClipSource {
     Rectangle(LayoutRect, ClipMode),
     RoundedRectangle(LayoutRect, BorderRadius, ClipMode),
     Image(ImageMask),
@@ -275,151 +307,167 @@ impl ClipSource {
     pub fn is_image_or_line_decoration_clip(&self) -> bool {
         match *self {
             ClipSource::Image(..) | ClipSource::LineDecoration(..) => true,
             _ => false,
         }
     }
 }
 
+
+struct BoundsAccumulator {
+    local_outer: Option<LayoutRect>,
+    local_inner: Option<LayoutRect>,
+    can_calculate_inner_rect: bool,
+    can_calculate_outer_rect: bool,
+}
+
+impl BoundsAccumulator {
+    fn new() -> Self {
+        BoundsAccumulator {
+            local_outer: Some(LayoutRect::max_rect()),
+            local_inner: Some(LayoutRect::max_rect()),
+            can_calculate_inner_rect: true,
+            can_calculate_outer_rect: false,
+        }
+    }
+
+    fn add(&mut self, source: &ClipSource) {
+        // Depending on the complexity of the clip, we may either know the outer and/or inner
+        // rect, or neither or these.  In the case of a clip-out, we currently set the mask bounds
+        // to be unknown. This is conservative, but ensures correctness. In the future we can make
+        // this a lot more clever with some proper region handling.
+        if !self.can_calculate_inner_rect {
+            return
+        }
+
+        match *source {
+            ClipSource::Image(ref mask) => {
+                if !mask.repeat {
+                    self.can_calculate_outer_rect = true;
+                    self.local_outer = self.local_outer.and_then(|r| r.intersection(&mask.rect));
+                }
+                self.local_inner = None;
+            }
+            ClipSource::Rectangle(rect, mode) => {
+                // Once we encounter a clip-out, we just assume the worst
+                // case clip mask size, for now.
+                if mode == ClipMode::ClipOut {
+                    self.can_calculate_inner_rect = false;
+                    return
+                }
+
+                self.can_calculate_outer_rect = true;
+                self.local_outer = self.local_outer.and_then(|r| r.intersection(&rect));
+                self.local_inner = self.local_inner.and_then(|r| r.intersection(&rect));
+            }
+            ClipSource::RoundedRectangle(ref rect, ref radius, mode) => {
+                // Once we encounter a clip-out, we just assume the worst
+                // case clip mask size, for now.
+                if mode == ClipMode::ClipOut {
+                    self.can_calculate_inner_rect = false;
+                    return
+                }
+
+                self.can_calculate_outer_rect = true;
+                self.local_outer = self.local_outer.and_then(|r| r.intersection(rect));
+
+                let inner_rect = extract_inner_rect_safe(rect, radius);
+                self.local_inner = self.local_inner
+                    .and_then(|r| inner_rect.and_then(|ref inner| r.intersection(inner)));
+            }
+            ClipSource::BoxShadow(..) |
+            ClipSource::LineDecoration(..) => {
+                self.can_calculate_inner_rect = false;
+            }
+        }
+    }
+
+    fn finish(self) -> (LayoutRect, Option<LayoutRect>) {
+        (
+            if self.can_calculate_inner_rect {
+                self.local_inner.unwrap_or_else(LayoutRect::zero)
+            } else {
+                LayoutRect::zero()
+            },
+            if self.can_calculate_outer_rect {
+                Some(self.local_outer.unwrap_or_else(LayoutRect::zero))
+            } else {
+                None
+            },
+        )
+    }
+}
+
+
 #[derive(Debug)]
 pub struct ClipSources {
     pub clips: Vec<(ClipSource, GpuCacheHandle)>,
     pub local_inner_rect: LayoutRect,
     pub local_outer_rect: Option<LayoutRect>,
     pub only_rectangular_clips: bool,
     pub has_image_or_line_decoration_clip: bool,
     pub spatial_node_index: SpatialNodeIndex,
 }
 
 impl ClipSources {
-    pub fn new(
-        clips: Vec<ClipSource>,
-        spatial_node_index: SpatialNodeIndex,
-    ) -> Self {
-        let (local_inner_rect, local_outer_rect) = Self::calculate_inner_and_outer_rects(&clips);
+    pub fn new<I>(clip_iter: I, spatial_node_index: SpatialNodeIndex) -> Self
+    where
+        I: IntoIterator<Item = ClipSource>,
+    {
+        let mut clips = Vec::new();
+        let mut bounds_accum = BoundsAccumulator::new();
+        let mut has_image_or_line_decoration_clip = false;
+        let mut only_rectangular_clips = true;
 
-        let has_image_or_line_decoration_clip =
-            clips.iter().any(|clip| clip.is_image_or_line_decoration_clip());
-        let only_rectangular_clips =
-            !has_image_or_line_decoration_clip && clips.iter().all(|clip| clip.is_rect());
-        let clips = clips
-            .into_iter()
-            .map(|clip| (clip, GpuCacheHandle::new()))
-            .collect();
+        for clip in clip_iter {
+            bounds_accum.add(&clip);
+            has_image_or_line_decoration_clip |= clip.is_image_or_line_decoration_clip();
+            only_rectangular_clips &= clip.is_rect();
+            clips.push((clip, GpuCacheHandle::new()));
+        }
+
+        only_rectangular_clips &= !has_image_or_line_decoration_clip;
+        let (local_inner_rect, local_outer_rect) = bounds_accum.finish();
 
         ClipSources {
             clips,
             local_inner_rect,
             local_outer_rect,
             only_rectangular_clips,
             has_image_or_line_decoration_clip,
             spatial_node_index,
         }
     }
 
-    pub fn from_region(
-        region: ClipRegion,
+    pub fn from_region<I>(
+        region: ClipRegion<I>,
         spatial_node_index: SpatialNodeIndex,
-    ) -> ClipSources {
-        let mut clips = Vec::new();
-
-        if let Some(info) = region.image_mask {
-            clips.push(ClipSource::Image(info));
-        }
-
-        clips.push(ClipSource::Rectangle(region.main, ClipMode::Clip));
-
-        for complex in region.complex_clips {
-            clips.push(ClipSource::new_rounded_rect(
+    ) -> ClipSources
+    where
+        I: IntoIterator<Item = ComplexClipRegion>
+    {
+        let clip_rect = iter::once(ClipSource::Rectangle(region.main, ClipMode::Clip));
+        let clip_image = region.image_mask.map(ClipSource::Image);
+        let clips_complex = region.complex_clips
+            .into_iter()
+            .map(|complex| ClipSource::new_rounded_rect(
                 complex.rect,
                 complex.radii,
                 complex.mode,
             ));
-        }
 
-        ClipSources::new(clips, spatial_node_index)
+        let clips_all = clip_rect.chain(clip_image).chain(clips_complex);
+        ClipSources::new(clips_all, spatial_node_index)
     }
 
     pub fn clips(&self) -> &[(ClipSource, GpuCacheHandle)] {
         &self.clips
     }
 
-    fn calculate_inner_and_outer_rects(clips: &Vec<ClipSource>) -> (LayoutRect, Option<LayoutRect>) {
-        if clips.is_empty() {
-            return (LayoutRect::zero(), None);
-        }
-
-        // Depending on the complexity of the clip, we may either know the outer and/or inner
-        // rect, or neither or these.  In the case of a clip-out, we currently set the mask bounds
-        // to be unknown. This is conservative, but ensures correctness. In the future we can make
-        // this a lot more clever with some proper region handling.
-        let mut local_outer = Some(LayoutRect::max_rect());
-        let mut local_inner = local_outer;
-        let mut can_calculate_inner_rect = true;
-        let mut can_calculate_outer_rect = false;
-        for source in clips {
-            match *source {
-                ClipSource::Image(ref mask) => {
-                    if !mask.repeat {
-                        can_calculate_outer_rect = true;
-                        local_outer = local_outer.and_then(|r| r.intersection(&mask.rect));
-                    }
-                    local_inner = None;
-                }
-                ClipSource::Rectangle(rect, mode) => {
-                    // Once we encounter a clip-out, we just assume the worst
-                    // case clip mask size, for now.
-                    if mode == ClipMode::ClipOut {
-                        can_calculate_inner_rect = false;
-                        break;
-                    }
-
-                    can_calculate_outer_rect = true;
-                    local_outer = local_outer.and_then(|r| r.intersection(&rect));
-                    local_inner = local_inner.and_then(|r| r.intersection(&rect));
-                }
-                ClipSource::RoundedRectangle(ref rect, ref radius, mode) => {
-                    // Once we encounter a clip-out, we just assume the worst
-                    // case clip mask size, for now.
-                    if mode == ClipMode::ClipOut {
-                        can_calculate_inner_rect = false;
-                        break;
-                    }
-
-                    can_calculate_outer_rect = true;
-                    local_outer = local_outer.and_then(|r| r.intersection(rect));
-
-                    let inner_rect = extract_inner_rect_safe(rect, radius);
-                    local_inner = local_inner
-                        .and_then(|r| inner_rect.and_then(|ref inner| r.intersection(inner)));
-                }
-                ClipSource::BoxShadow(..) |
-                ClipSource::LineDecoration(..) => {
-                    can_calculate_inner_rect = false;
-                    break;
-                }
-            }
-        }
-
-        let outer = if can_calculate_outer_rect {
-            Some(local_outer.unwrap_or_else(LayoutRect::zero))
-        } else {
-            None
-        };
-
-        let inner = if can_calculate_inner_rect {
-            local_inner.unwrap_or_else(LayoutRect::zero)
-        } else {
-            LayoutRect::zero()
-        };
-
-        (inner, outer)
-    }
-
     pub fn update(
         &mut self,
         gpu_cache: &mut GpuCache,
         resource_cache: &mut ResourceCache,
         device_pixel_scale: DevicePixelScale,
     ) {
         for &mut (ref mut source, ref mut handle) in &mut self.clips {
             if let Some(mut request) = gpu_cache.request(handle) {
@@ -518,17 +566,18 @@ impl ClipSources {
     ) -> (DeviceIntRect, Option<DeviceIntRect>) {
         // If this translation isn't axis aligned or has a perspective component, don't try to
         // calculate the inner rectangle. The rectangle that we produce would include potentially
         // clipped screen area.
         // TODO(mrobinson): We should eventually try to calculate an inner region or some inner
         // rectangle so that we can do screen inner rectangle optimizations for these kind of
         // cilps.
         let can_calculate_inner_rect =
-            transform.preserves_2d_axis_alignment() && !transform.has_perspective_component();
+            transform.kind() == TransformedRectKind::AxisAligned &&
+            !transform.has_perspective_component();
         let screen_inner_rect = if can_calculate_inner_rect {
             calculate_screen_bounding_rect(transform, &self.local_inner_rect, device_pixel_scale, screen_rect)
                 .unwrap_or(DeviceIntRect::zero())
         } else {
             DeviceIntRect::zero()
         };
 
         let screen_outer_rect = self.local_outer_rect.map(|outer_rect|
--- a/gfx/webrender/src/clip_node.rs
+++ b/gfx/webrender/src/clip_node.rs
@@ -32,17 +32,17 @@ impl ClipNode {
         &mut self,
         device_pixel_scale: DevicePixelScale,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         clip_chains: &mut [ClipChain],
         spatial_nodes: &[SpatialNode],
     ) {
-        let clip_sources = clip_store.get_mut(self.clip_sources_index);
+        let clip_sources = &mut clip_store[self.clip_sources_index];
         clip_sources.update(gpu_cache, resource_cache, device_pixel_scale);
         let spatial_node = &spatial_nodes[clip_sources.spatial_node_index.0];
 
         let (screen_inner_rect, screen_outer_rect) = clip_sources.get_screen_bounds(
             &spatial_node.world_content_transform,
             device_pixel_scale,
             None,
         );
@@ -55,20 +55,18 @@ impl ClipNode {
         let local_outer_rect = clip_sources.local_outer_rect
             .expect("Clipping node didn't have outer rect.");
 
         let new_node = ClipChainNode {
             work_item: ClipWorkItem {
                 clip_sources_index: self.clip_sources_index,
                 coordinate_system_id: spatial_node.coordinate_system_id,
             },
-            local_clip_rect: spatial_node
-                .coordinate_system_relative_transform
-                .transform_rect(&local_outer_rect)
-                .expect("clip node transform is not valid"),
+            local_clip_rect: local_outer_rect
+                .translate(&spatial_node.coordinate_system_relative_offset),
             screen_outer_rect,
             screen_inner_rect,
             prev: None,
         };
 
         let mut clip_chain =
             clip_chains[self.parent_clip_chain_index.0]
             .new_with_added_node(&new_node);
--- a/gfx/webrender/src/clip_scroll_tree.rs
+++ b/gfx/webrender/src/clip_scroll_tree.rs
@@ -9,17 +9,17 @@ use clip::{ClipChain, ClipSourcesIndex, 
 use clip_node::ClipNode;
 use gpu_cache::GpuCache;
 use gpu_types::TransformPalette;
 use internal_types::{FastHashMap, FastHashSet};
 use print_tree::{PrintTree, PrintTreePrinter};
 use resource_cache::ResourceCache;
 use scene::SceneProperties;
 use spatial_node::{ScrollFrameInfo, SpatialNode, SpatialNodeType, StickyFrameInfo};
-use util::{LayoutFastTransform, LayoutToWorldFastTransform};
+use util::LayoutToWorldFastTransform;
 
 pub type ScrollStates = FastHashMap<ExternalScrollId, ScrollFrameInfo>;
 
 /// An id that identifies coordinate systems in the ClipScrollTree. Each
 /// coordinate system has an id and those ids will be shared when the coordinates
 /// system are the same or are in the same axis-aligned space. This allows
 /// for optimizing mask generation.
 #[derive(Debug, Copy, Clone, PartialEq)]
@@ -98,18 +98,18 @@ pub struct TransformUpdateState {
     pub nearest_scrolling_ancestor_viewport: LayoutRect,
 
     /// An id for keeping track of the axis-aligned space of this node. This is used in
     /// order to to track what kinds of clip optimizations can be done for a particular
     /// display list item, since optimizations can usually only be done among
     /// coordinate systems which are relatively axis aligned.
     pub current_coordinate_system_id: CoordinateSystemId,
 
-    /// Transform from the coordinate system that started this compatible coordinate system.
-    pub coordinate_system_relative_transform: LayoutFastTransform,
+    /// Offset from the coordinate system that started this compatible coordinate system.
+    pub coordinate_system_relative_offset: LayoutVector2D,
 
     /// True if this node is transformed by an invertible transform.  If not, display items
     /// transformed by this node will not be displayed and display items not transformed by this
     /// node will not be clipped by clips that are transformed by this node.
     pub invertible: bool,
 }
 
 impl ClipScrollTree {
@@ -237,17 +237,17 @@ impl ClipScrollTree {
 
         let root_reference_frame_index = self.root_reference_frame_index();
         let mut state = TransformUpdateState {
             parent_reference_frame_transform: LayoutVector2D::new(pan.x, pan.y).into(),
             parent_accumulated_scroll_offset: LayoutVector2D::zero(),
             nearest_scrolling_ancestor_offset: LayoutVector2D::zero(),
             nearest_scrolling_ancestor_viewport: LayoutRect::zero(),
             current_coordinate_system_id: CoordinateSystemId::root(),
-            coordinate_system_relative_transform: LayoutFastTransform::identity(),
+            coordinate_system_relative_offset: LayoutVector2D::zero(),
             invertible: true,
         };
 
         let mut next_coordinate_system_id = state.current_coordinate_system_id.next();
         self.update_node(
             root_reference_frame_index,
             &mut state,
             &mut next_coordinate_system_id,
--- a/gfx/webrender/src/device/query_gl.rs
+++ b/gfx/webrender/src/device/query_gl.rs
@@ -66,26 +66,28 @@ impl<T> QuerySet<T> {
 }
 
 pub struct GpuFrameProfile<T> {
     gl: Rc<gl::Gl>,
     timers: QuerySet<GpuTimer<T>>,
     samplers: QuerySet<GpuSampler<T>>,
     frame_id: FrameId,
     inside_frame: bool,
+    ext_debug_marker: bool
 }
 
 impl<T> GpuFrameProfile<T> {
-    fn new(gl: Rc<gl::Gl>) -> Self {
+    fn new(gl: Rc<gl::Gl>, ext_debug_marker: bool) -> Self {
         GpuFrameProfile {
             gl,
             timers: QuerySet::new(),
             samplers: QuerySet::new(),
             frame_id: FrameId::new(0),
             inside_frame: false,
+            ext_debug_marker
         }
     }
 
     fn enable_timers(&mut self, count: i32) {
         self.timers.set = self.gl.gen_queries(count);
     }
 
     fn disable_timers(&mut self) {
@@ -135,17 +137,17 @@ impl<T> GpuFrameProfile<T> {
         }
     }
 }
 
 impl<T: NamedTag> GpuFrameProfile<T> {
     fn start_timer(&mut self, tag: T) -> GpuTimeQuery {
         self.finish_timer();
 
-        let marker = GpuMarker::new(&self.gl, tag.get_label());
+        let marker = GpuMarker::new(&self.gl, tag.get_label(), self.ext_debug_marker);
 
         if let Some(query) = self.timers.add(GpuTimer { tag, time_ns: 0 }) {
             self.gl.begin_query(gl::TIME_ELAPSED, query);
         }
 
         GpuTimeQuery(marker)
     }
 
@@ -182,29 +184,31 @@ impl<T> Drop for GpuFrameProfile<T> {
         self.disable_samplers();
     }
 }
 
 pub struct GpuProfiler<T> {
     gl: Rc<gl::Gl>,
     frames: Vec<GpuFrameProfile<T>>,
     next_frame: usize,
+    ext_debug_marker: bool
 }
 
 impl<T> GpuProfiler<T> {
-    pub fn new(gl: Rc<gl::Gl>) -> Self {
+    pub fn new(gl: Rc<gl::Gl>, ext_debug_marker: bool) -> Self {
         const MAX_PROFILE_FRAMES: usize = 4;
         let frames = (0 .. MAX_PROFILE_FRAMES)
-            .map(|_| GpuFrameProfile::new(Rc::clone(&gl)))
+            .map(|_| GpuFrameProfile::new(Rc::clone(&gl), ext_debug_marker))
             .collect();
 
         GpuProfiler {
             gl,
             next_frame: 0,
             frames,
+            ext_debug_marker
         }
     }
 
     pub fn enable_timers(&mut self) {
         const MAX_TIMERS_PER_FRAME: i32 = 256;
 
         for frame in &mut self.frames {
             frame.enable_timers(MAX_TIMERS_PER_FRAME);
@@ -258,42 +262,51 @@ impl<T: NamedTag> GpuProfiler<T> {
         self.frames[self.next_frame].start_sampler(tag)
     }
 
     pub fn finish_sampler(&mut self, _sampler: GpuSampleQuery) {
         self.frames[self.next_frame].finish_sampler()
     }
 
     pub fn start_marker(&mut self, label: &str) -> GpuMarker {
-        GpuMarker::new(&self.gl, label)
+        GpuMarker::new(&self.gl, label, self.ext_debug_marker)
     }
 
     pub fn place_marker(&mut self, label: &str) {
-        GpuMarker::fire(&self.gl, label)
+        GpuMarker::fire(&self.gl, label, self.ext_debug_marker)
     }
 }
 
 #[must_use]
 pub struct GpuMarker {
-    gl: Rc<gl::Gl>,
+    gl: Option<Rc<gl::Gl>>
 }
 
 impl GpuMarker {
-    fn new(gl: &Rc<gl::Gl>, message: &str) -> Self {
-        gl.push_group_marker_ext(message);
-        GpuMarker { gl: Rc::clone(gl) }
+    fn new(gl: &Rc<gl::Gl>, message: &str, ext_debug_marker: bool) -> Self {
+        let gl = if ext_debug_marker {
+            gl.push_group_marker_ext(message);            
+            Some(Rc::clone(gl))
+        } else {
+            None
+        };
+        GpuMarker { gl }
     }
 
-    fn fire(gl: &Rc<gl::Gl>, message: &str) {
-        gl.insert_event_marker_ext(message);
+    fn fire(gl: &Rc<gl::Gl>, message: &str, ext_debug_marker: bool) {
+        if ext_debug_marker {
+            gl.insert_event_marker_ext(message);
+        }
     }
 }
 
 impl Drop for GpuMarker {
     fn drop(&mut self) {
-        self.gl.pop_group_marker_ext();
+        if let Some(ref gl) = self.gl {
+            gl.pop_group_marker_ext();
+        }
     }
 }
 
 #[must_use]
 pub struct GpuTimeQuery(GpuMarker);
 #[must_use]
 pub struct GpuSampleQuery;
--- a/gfx/webrender/src/display_list_flattener.rs
+++ b/gfx/webrender/src/display_list_flattener.rs
@@ -219,32 +219,32 @@ impl<'a> DisplayListFlattener<'a> {
             flattener,
         )
     }
 
     fn get_complex_clips(
         &self,
         pipeline_id: PipelineId,
         complex_clips: ItemRange<ComplexClipRegion>,
-    ) -> Vec<ComplexClipRegion> {
-        if complex_clips.is_empty() {
-            return vec![];
-        }
-        self.scene.get_display_list_for_pipeline(pipeline_id).get(complex_clips).collect()
+    ) -> impl 'a + Iterator<Item = ComplexClipRegion> {
+        //Note: we could make this a bit more complex to early out
+        // on `complex_clips.is_empty()` if it's worth it
+        self.scene
+            .get_display_list_for_pipeline(pipeline_id)
+            .get(complex_clips)
     }
 
     fn get_clip_chain_items(
         &self,
         pipeline_id: PipelineId,
         items: ItemRange<ClipId>,
-    ) -> Vec<ClipId> {
-        if items.is_empty() {
-            return vec![];
-        }
-        self.scene.get_display_list_for_pipeline(pipeline_id).get(items).collect()
+    ) -> impl 'a + Iterator<Item = ClipId> {
+        self.scene
+            .get_display_list_for_pipeline(pipeline_id)
+            .get(items)
     }
 
     fn flatten_root(&mut self, pipeline: &'a ScenePipeline, frame_size: &LayoutSize) {
         let pipeline_id = pipeline.pipeline_id;
         let reference_frame_info = self.simple_scroll_and_clip_chain(
             &ClipId::root_reference_frame(pipeline_id),
         );
 
@@ -680,18 +680,17 @@ impl<'a> DisplayListFlattener<'a> {
                     complex_clips,
                     info.image_mask,
                     &reference_frame_relative_offset,
                 );
                 self.add_clip_node(info.id, clip_and_scroll_ids.scroll_node_id, clip_region);
             }
             SpecificDisplayItem::ClipChain(ref info) => {
                 let items = self.get_clip_chain_items(pipeline_id, item.clip_chain_items())
-                    .iter()
-                    .map(|id| self.id_to_index_mapper.get_clip_node_index(*id))
+                    .map(|id| self.id_to_index_mapper.get_clip_node_index(id))
                     .collect();
                 let parent = match info.parent {
                     Some(id) => Some(
                         self.id_to_index_mapper.get_clip_chain_index(&ClipId::ClipChain(id))
                     ),
                     None => self.pipeline_clip_chain_stack.last().cloned(),
                 };
                 let clip_chain_index =
@@ -1236,22 +1235,25 @@ impl<'a> DisplayListFlattener<'a> {
             Some(ExternalScrollId(0, pipeline_id)),
             pipeline_id,
             &LayoutRect::new(LayoutPoint::zero(), *viewport_size),
             content_size,
             ScrollSensitivity::ScriptAndInputEvents,
         );
     }
 
-    pub fn add_clip_node(
+    pub fn add_clip_node<I>(
         &mut self,
         new_node_id: ClipId,
         parent_id: ClipId,
-        clip_region: ClipRegion,
-    ) -> ClipChainIndex {
+        clip_region: ClipRegion<I>,
+    ) -> ClipChainIndex
+    where
+        I: IntoIterator<Item = ComplexClipRegion>
+    {
         let parent_clip_chain_index = self.id_to_index_mapper.get_clip_chain_index(&parent_id);
         let spatial_node = self.id_to_index_mapper.get_spatial_node_index(parent_id);
 
         let clip_sources = ClipSources::from_region(clip_region, spatial_node);
         let handle = self.clip_store.insert(clip_sources);
 
         let (node_index, clip_chain_index) = self.clip_scroll_tree.add_clip_node(
             parent_clip_chain_index,
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -99,40 +99,40 @@ pub struct PictureContext<'a> {
 
 pub struct PictureState {
     pub tasks: Vec<RenderTaskId>,
     pub has_non_root_coord_system: bool,
     pub local_rect_changed: bool,
 }
 
 impl PictureState {
-    pub fn new() -> PictureState {
+    pub fn new() -> Self {
         PictureState {
             tasks: Vec::new(),
             has_non_root_coord_system: false,
             local_rect_changed: false,
         }
     }
 }
 
 pub struct PrimitiveRunContext<'a> {
     pub clip_chain: &'a ClipChain,
     pub scroll_node: &'a SpatialNode,
     pub spatial_node_index: SpatialNodeIndex,
-    pub transform: Transform,
+    pub transform: Transform<'a>,
     pub local_clip_rect: LayoutRect,
 }
 
 impl<'a> PrimitiveRunContext<'a> {
     pub fn new(
         clip_chain: &'a ClipChain,
         scroll_node: &'a SpatialNode,
         spatial_node_index: SpatialNodeIndex,
         local_clip_rect: LayoutRect,
-        transform: Transform,
+        transform: Transform<'a>,
     ) -> Self {
         PrimitiveRunContext {
             clip_chain,
             scroll_node,
             local_clip_rect,
             spatial_node_index,
             transform,
         }
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -3,17 +3,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{DevicePoint, DeviceSize, DeviceRect, LayoutRect, LayoutToWorldTransform};
 use api::{PremultipliedColorF, WorldToLayoutTransform};
 use clip_scroll_tree::SpatialNodeIndex;
 use gpu_cache::{GpuCacheAddress, GpuDataRequest};
 use prim_store::{EdgeAaSegmentMask, Transform};
 use render_task::RenderTaskAddress;
-use util::{MatrixHelpers, TransformedRectKind};
+use util::{LayoutToWorldFastTransform, TransformedRectKind};
 
 // Contains type that must exactly match the same structures declared in GLSL.
 
 #[derive(Copy, Clone, Debug)]
 #[repr(C)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ZBufferId(i32);
@@ -369,98 +369,119 @@ impl TransformPaletteId {
 }
 
 // The GPU data payload for a transform palette entry.
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[repr(C)]
 pub struct TransformData {
-    pub transform: LayoutToWorldTransform,
-    pub inv_transform: WorldToLayoutTransform,
+    transform: LayoutToWorldTransform,
+    inv_transform: WorldToLayoutTransform,
 }
 
 impl TransformData {
-    pub fn invalid() -> Self {
+    fn invalid() -> Self {
         TransformData {
             transform: LayoutToWorldTransform::identity(),
             inv_transform: WorldToLayoutTransform::identity(),
         }
     }
 }
 
 // Extra data stored about each transform palette entry.
 pub struct TransformMetadata {
-    pub transform_kind: TransformedRectKind,
+    transform_kind: TransformedRectKind,
 }
 
 // Stores a contiguous list of TransformData structs, that
 // are ready for upload to the GPU.
 // TODO(gw): For now, this only stores the complete local
 //           to world transform for each spatial node. In
 //           the future, the transform palette will support
 //           specifying a coordinate system that the transform
 //           should be relative to.
 pub struct TransformPalette {
     pub transforms: Vec<TransformData>,
     metadata: Vec<TransformMetadata>,
 }
 
 impl TransformPalette {
-    pub fn new(spatial_node_count: usize) -> TransformPalette {
+    pub fn new(spatial_node_count: usize) -> Self {
         TransformPalette {
             transforms: Vec::with_capacity(spatial_node_count),
             metadata: Vec::with_capacity(spatial_node_count),
         }
     }
 
-    // Set the local -> world transform for a given spatial
-    // node in the transform palette.
-    pub fn set(
-        &mut self,
-        index: SpatialNodeIndex,
-        data: TransformData,
-    ) {
-        let index = index.0 as usize;
-
+    #[inline]
+    fn grow(&mut self, index: SpatialNodeIndex) {
         // Pad the vectors out if they are not long enough to
         // account for this index. This can occur, for instance,
         // when we stop recursing down the CST due to encountering
         // a node with an invalid transform.
-        while index >= self.transforms.len() {
+        while self.transforms.len() <= index.0 as usize {
             self.transforms.push(TransformData::invalid());
             self.metadata.push(TransformMetadata {
                 transform_kind: TransformedRectKind::AxisAligned,
             });
         }
+    }
 
-        // Store the transform itself, along with metadata about it.
-        self.metadata[index] = TransformMetadata {
-            transform_kind: data.transform.transform_kind(),
+    pub fn invalidate(&mut self, index: SpatialNodeIndex) {
+        self.grow(index);
+        self.metadata[index.0 as usize] = TransformMetadata {
+            transform_kind: TransformedRectKind::AxisAligned,
         };
-        self.transforms[index] = data;
+        self.transforms[index.0 as usize] = TransformData::invalid();
+    }
+
+    // Set the local -> world transform for a given spatial
+    // node in the transform palette.
+    pub fn set(
+        &mut self, index: SpatialNodeIndex, fast_transform: &LayoutToWorldFastTransform,
+    ) -> bool {
+        self.grow(index);
+
+        match fast_transform.inverse() {
+            Some(inverted) => {
+                // Store the transform itself, along with metadata about it.
+                self.metadata[index.0 as usize] = TransformMetadata {
+                    transform_kind: fast_transform.kind()
+                };
+                // Write the data that will be made available to the GPU for this node.
+                self.transforms[index.0 as usize] = TransformData {
+                    transform: fast_transform.to_transform().into_owned(),
+                    inv_transform: inverted.to_transform().into_owned(),
+                };
+                true
+            }
+            None => {
+                self.invalidate(index);
+                false
+            }
+        }
     }
 
     // Get the relevant information about a given transform that is
     // used by the CPU code during culling and primitive prep pass.
     // TODO(gw): In the future, it will be possible to specify
     //           a coordinate system id here, to allow retrieving
     //           transforms in the local space of a given spatial node.
     pub fn get_transform(
         &self,
         index: SpatialNodeIndex,
     ) -> Transform {
-        let index = index.0;
-        let transform = &self.transforms[index];
-        let metadata = &self.metadata[index];
+        let data = &self.transforms[index.0 as usize];
+        let metadata = &self.metadata[index.0 as usize];
 
         Transform {
-            m: transform.transform,
+            m: &data.transform,
             transform_kind: metadata.transform_kind,
-            backface_is_visible: transform.transform.is_backface_visible(),
+            backface_is_visible: data.transform.is_backface_visible(),
         }
     }
 
     // Get a transform palette id for the given spatial node.
     // TODO(gw): In the future, it will be possible to specify
     //           a coordinate system id here, to allow retrieving
     //           transforms in the local space of a given spatial node.
     pub fn get_id(
--- a/gfx/webrender/src/hit_test.rs
+++ b/gfx/webrender/src/hit_test.rs
@@ -31,17 +31,17 @@ pub struct HitTestClipNode {
 
     /// A particular point must be inside all of these regions to be considered clipped in
     /// for the purposes of a hit test.
     regions: Vec<HitTestRegion>,
 }
 
 impl HitTestClipNode {
     fn new(node: &ClipNode, clip_store: &ClipStore) -> Self {
-        let clips = clip_store.get(node.clip_sources_index);
+        let clips = &clip_store[node.clip_sources_index];
         let regions = clips.clips().iter().map(|source| {
             match source.0 {
                 ClipSource::Rectangle(ref rect, mode) => HitTestRegion::Rectangle(*rect, mode),
                 ClipSource::RoundedRectangle(ref rect, ref radii, ref mode) =>
                     HitTestRegion::RoundedRectangle(*rect, *radii, *mode),
                 ClipSource::Image(ref mask) => HitTestRegion::Rectangle(mask.rect, ClipMode::Clip),
                 ClipSource::LineDecoration(_) |
                 ClipSource::BoxShadow(_) => {
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -58,18 +58,18 @@ impl ScrollNodeAndClipChain {
     }
 }
 
 // This is CPU-side information about a transform, that is relevant
 // during culling and primitive prep pass. Often it is the same as
 // the information in the clip-scroll tree. However, if we decide
 // to rasterize a picture in local space, then this will be the
 // transform relative to that picture's coordinate system.
-pub struct Transform {
-    pub m: LayoutToWorldTransform,
+pub struct Transform<'a> {
+    pub m: &'a LayoutToWorldTransform,
     pub backface_is_visible: bool,
     pub transform_kind: TransformedRectKind,
 }
 
 #[derive(Debug)]
 pub struct PrimitiveRun {
     pub base_prim_index: PrimitiveIndex,
     pub count: usize,
@@ -1490,17 +1490,19 @@ impl PrimitiveStore {
                 ref mut handle,
                 ref mut task_info,
                 ..
             } = *source {
                 // TODO(gw): When drawing in screen raster mode, we should also incorporate a
                 //           scale factor from the world transform to get an appropriately
                 //           sized border task.
                 let world_scale = LayoutToWorldScale::new(1.0);
-                let scale = world_scale * frame_context.device_pixel_scale;
+                let mut scale = world_scale * frame_context.device_pixel_scale;
+                let max_scale = BorderRenderTaskInfo::get_max_scale(&border.radius);
+                scale.0 = scale.0.min(max_scale.0);
                 let scale_au = Au::from_f32_px(scale.0);
                 let needs_update = scale_au != cache_key.scale;
                 let mut new_segments = Vec::new();
 
                 if needs_update {
                     cache_key.scale = scale_au;
 
                     *task_info = BorderRenderTaskInfo::new(
@@ -1567,17 +1569,17 @@ impl PrimitiveStore {
         {
             metadata.prepared_frame_id = frame_state.render_tasks.frame_id();
         }
 
         match metadata.prim_kind {
             PrimitiveKind::TextRun => {
                 let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
                 // The transform only makes sense for screen space rasterization
-                let transform = prim_run_context.scroll_node.world_content_transform.into();
+                let transform = prim_run_context.scroll_node.world_content_transform.to_transform();
                 text.prepare_for_render(
                     frame_context.device_pixel_scale,
                     &transform,
                     pic_context.allow_subpixel_aa,
                     pic_context.display_list,
                     frame_state,
                 );
             }
@@ -2057,17 +2059,17 @@ impl PrimitiveStore {
         };
 
         // Segment the primitive on all the local-space clip sources that we can.
         for clip_item in clips {
             if clip_item.coordinate_system_id != prim_run_context.scroll_node.coordinate_system_id {
                 continue;
             }
 
-            let local_clips = frame_state.clip_store.get(clip_item.clip_sources_index);
+            let local_clips = &frame_state.clip_store[clip_item.clip_sources_index];
             rect_clips_only = rect_clips_only && local_clips.only_rectangular_clips;
 
             // TODO(gw): We can easily extend the segment builder to support these clip sources in
             // the future, but they are rarely used.
             // We must do this check here in case we continue early below.
             if local_clips.has_image_or_line_decoration_clip {
                 clip_mask_kind = BrushClipMaskKind::Global;
             }
@@ -2284,17 +2286,17 @@ impl PrimitiveStore {
                 prim_screen_rect, prim_run_context.clip_chain.combined_outer_screen_rect);
         }
 
         let prim_coordinate_system_id = prim_run_context.scroll_node.coordinate_system_id;
         let transform = &prim_run_context.scroll_node.world_content_transform;
         let extra_clip =  {
             let metadata = &self.cpu_metadata[prim_index.0];
             metadata.clip_sources_index.map(|clip_sources_index| {
-                let prim_clips = frame_state.clip_store.get_mut(clip_sources_index);
+                let prim_clips = &mut frame_state.clip_store[clip_sources_index];
                 prim_clips.update(
                     frame_state.gpu_cache,
                     frame_state.resource_cache,
                     frame_context.device_pixel_scale,
                 );
                 let (screen_inner_rect, screen_outer_rect) = prim_clips.get_screen_bounds(
                     transform,
                     frame_context.device_pixel_scale,
@@ -2959,18 +2961,18 @@ fn get_local_clip_rect_for_nodes(
                     Some(combined_rect) =>
                         combined_rect
                             .intersection(&node.local_clip_rect)
                             .unwrap_or_else(LayoutRect::zero),
                     None => node.local_clip_rect,
                 })
             }
         )
-        .and_then(|local_rect| {
-            scroll_node.coordinate_system_relative_transform.unapply(&local_rect)
+        .map(|local_rect| {
+            local_rect.translate(&-scroll_node.coordinate_system_relative_offset)
         })
 }
 
 impl<'a> GpuDataRequest<'a> {
     // Write the GPU cache data for an individual segment.
     fn write_segment(
         &mut self,
         local_rect: LayoutRect,
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -398,17 +398,17 @@ impl RenderTask {
         // task cache. This allows the blurred box-shadow rect to be cached
         // in the texture cache across frames.
         // TODO(gw): Consider moving this logic outside this function, especially
         //           as we add more clip sources that depend on render tasks.
         // TODO(gw): If this ever shows up in a profile, we could pre-calculate
         //           whether a ClipSources contains any box-shadows and skip
         //           this iteration for the majority of cases.
         for clip_item in &clips {
-            let clip_sources = clip_store.get_mut(clip_item.clip_sources_index);
+            let clip_sources = &mut clip_store[clip_item.clip_sources_index];
             for &mut (ref mut clip, _) in &mut clip_sources.clips {
                 match *clip {
                     ClipSource::BoxShadow(ref mut info) => {
                         let (cache_size, cache_key) = info.cache_key
                             .as_ref()
                             .expect("bug: no cache key set")
                             .clone();
                         let blur_radius_dp = cache_key.blur_radius_dp as f32;
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -1762,17 +1762,18 @@ impl Renderer {
                 enable_render_on_scroll,
             );
             backend.run(backend_profile_counters);
             if let Some(ref thread_listener) = *thread_listener_for_render_backend {
                 thread_listener.thread_stopped(&rb_thread_name);
             }
         })?;
 
-        let gpu_profile = GpuProfiler::new(Rc::clone(device.rc_gl()));
+        let ext_debug_marker = device.supports_extension("GL_EXT_debug_marker");
+        let gpu_profile = GpuProfiler::new(Rc::clone(device.rc_gl()), ext_debug_marker);
         #[cfg(feature = "capture")]
         let read_fbo = device.create_fbo_for_external_texture(0);
 
         let mut renderer = Renderer {
             result_rx,
             debug_server,
             device,
             active_documents: Vec::new(),
--- a/gfx/webrender/src/spatial_node.rs
+++ b/gfx/webrender/src/spatial_node.rs
@@ -3,19 +3,19 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ExternalScrollId, LayoutPixel, LayoutPoint, LayoutRect, LayoutSize, LayoutTransform};
 use api::{LayoutVector2D, PipelineId, PropertyBinding, ScrollClamping, ScrollLocation};
 use api::{ScrollSensitivity, StickyOffsetBounds};
 use clip_scroll_tree::{CoordinateSystemId, SpatialNodeIndex, TransformUpdateState};
 use euclid::SideOffsets2D;
-use gpu_types::{TransformData, TransformPalette};
+use gpu_types::TransformPalette;
 use scene::SceneProperties;
-use util::{LayoutFastTransform, LayoutToWorldFastTransform, TransformedRectKind};
+use util::{LayoutFastTransform, LayoutToWorldFastTransform, MatrixHelpers, TransformedRectKind};
 
 #[derive(Clone, Debug)]
 pub enum SpatialNodeType {
     /// A special kind of node that adjusts its position based on the position
     /// of its parent node and a given set of sticky positioning offset bounds.
     /// Sticky positioned is described in the CSS Positioned Layout Module Level 3 here:
     /// https://www.w3.org/TR/css-position-3/#sticky-pos
     StickyFrame(StickyFrameInfo),
@@ -61,17 +61,17 @@ pub struct SpatialNode {
     pub invertible: bool,
 
     /// The axis-aligned coordinate system id of this node.
     pub coordinate_system_id: CoordinateSystemId,
 
     /// The transformation from the coordinate system which established our compatible coordinate
     /// system (same coordinate system id) and us. This can change via scroll offsets and via new
     /// reference frame transforms.
-    pub coordinate_system_relative_transform: LayoutFastTransform,
+    pub coordinate_system_relative_offset: LayoutVector2D,
 }
 
 impl SpatialNode {
     pub fn new(
         pipeline_id: PipelineId,
         parent_index: Option<SpatialNodeIndex>,
         node_type: SpatialNodeType,
     ) -> Self {
@@ -80,17 +80,17 @@ impl SpatialNode {
             world_content_transform: LayoutToWorldFastTransform::identity(),
             transform_kind: TransformedRectKind::AxisAligned,
             parent: parent_index,
             children: Vec::new(),
             pipeline_id,
             node_type,
             invertible: true,
             coordinate_system_id: CoordinateSystemId(0),
-            coordinate_system_relative_transform: LayoutFastTransform::identity(),
+            coordinate_system_relative_offset: LayoutVector2D::zero(),
         }
     }
 
     pub fn new_scroll_frame(
         pipeline_id: PipelineId,
         parent_index: SpatialNodeIndex,
         external_id: Option<ExternalScrollId>,
         frame_rect: &LayoutRect,
@@ -199,57 +199,38 @@ impl SpatialNode {
     }
 
     pub fn push_gpu_data(
         &mut self,
         transform_palette: &mut TransformPalette,
         node_index: SpatialNodeIndex,
     ) {
         if !self.invertible {
-            transform_palette.set(node_index, TransformData::invalid());
+            transform_palette.invalidate(node_index);
             return;
         }
 
-        let inv_transform = match self.world_content_transform.inverse() {
-            Some(inverted) => inverted.to_transform(),
-            None => {
-                transform_palette.set(node_index, TransformData::invalid());
-                return;
-            }
-        };
-
-        let data = TransformData {
-            transform: self.world_content_transform.into(),
-            inv_transform,
-        };
-
-        // Write the data that will be made available to the GPU for this node.
-        transform_palette.set(node_index, data);
+        transform_palette.set(node_index, &self.world_content_transform);
     }
 
     pub fn update(
         &mut self,
         state: &mut TransformUpdateState,
         next_coordinate_system_id: &mut CoordinateSystemId,
         scene_properties: &SceneProperties,
     ) {
         // If any of our parents was not rendered, we are not rendered either and can just
         // quit here.
         if !state.invertible {
             self.mark_uninvertible();
             return;
         }
 
         self.update_transform(state, next_coordinate_system_id, scene_properties);
-
-        self.transform_kind = if self.world_content_transform.preserves_2d_axis_alignment() {
-            TransformedRectKind::AxisAligned
-        } else {
-            TransformedRectKind::Complex
-        };
+        self.transform_kind = self.world_content_transform.kind();
 
         // If this node is a reference frame, we check if it has a non-invertible matrix.
         // For non-reference-frames we assume that they will produce only additional
         // translations which should be invertible.
         match self.node_type {
             SpatialNodeType::ReferenceFrame(info) if !info.invertible => {
                 self.mark_uninvertible();
                 return;
@@ -287,23 +268,26 @@ impl SpatialNode {
 
                 info.invertible = self.world_viewport_transform.is_invertible();
                 if !info.invertible {
                     return;
                 }
 
                 // Try to update our compatible coordinate system transform. If we cannot, start a new
                 // incompatible coordinate system.
-                match state.coordinate_system_relative_transform.update(relative_transform) {
-                    Some(offset) => self.coordinate_system_relative_transform = offset,
-                    None => {
-                        self.coordinate_system_relative_transform = LayoutFastTransform::identity();
-                        state.current_coordinate_system_id = *next_coordinate_system_id;
-                        next_coordinate_system_id.advance();
-                    }
+                if relative_transform.is_simple_2d_translation() {
+                    self.coordinate_system_relative_offset =
+                        state.coordinate_system_relative_offset +
+                        LayoutVector2D::new(relative_transform.m41, relative_transform.m42);
+                } else {
+                    // If we break 2D axis alignment or have a perspective component, we need to start a
+                    // new incompatible coordinate system with which we cannot share clips without masking.
+                    self.coordinate_system_relative_offset = LayoutVector2D::zero();
+                    state.current_coordinate_system_id = *next_coordinate_system_id;
+                    next_coordinate_system_id.advance();
                 }
 
                 self.coordinate_system_id = state.current_coordinate_system_id;
             }
             _ => {
                 // We calculate this here to avoid a double-borrow later.
                 let sticky_offset = self.calculate_sticky_offset(
                     &state.nearest_scrolling_ancestor_offset,
@@ -325,18 +309,18 @@ impl SpatialNode {
                 let scroll_offset = self.scroll_offset();
                 self.world_content_transform = if scroll_offset != LayoutVector2D::zero() {
                     self.world_viewport_transform.pre_translate(&scroll_offset)
                 } else {
                     self.world_viewport_transform
                 };
 
                 let added_offset = state.parent_accumulated_scroll_offset + sticky_offset + scroll_offset;
-                self.coordinate_system_relative_transform =
-                    state.coordinate_system_relative_transform.offset(added_offset);
+                self.coordinate_system_relative_offset =
+                    state.coordinate_system_relative_offset + added_offset;
 
                 if let SpatialNodeType::StickyFrame(ref mut info) = self.node_type {
                     info.current_offset = sticky_offset;
                 }
 
                 self.coordinate_system_id = state.current_coordinate_system_id;
             }
         }
@@ -473,18 +457,17 @@ impl SpatialNode {
                 state.parent_accumulated_scroll_offset =
                     scrolling.offset + state.parent_accumulated_scroll_offset;
                 state.nearest_scrolling_ancestor_offset = scrolling.offset;
                 state.nearest_scrolling_ancestor_viewport = scrolling.viewport_rect;
             }
             SpatialNodeType::ReferenceFrame(ref info) => {
                 state.parent_reference_frame_transform = self.world_viewport_transform;
                 state.parent_accumulated_scroll_offset = LayoutVector2D::zero();
-                state.coordinate_system_relative_transform =
-                    self.coordinate_system_relative_transform.clone();
+                state.coordinate_system_relative_offset = self.coordinate_system_relative_offset;
                 let translation = -info.origin_in_parent_reference_frame;
                 state.nearest_scrolling_ancestor_viewport =
                     state.nearest_scrolling_ancestor_viewport
                        .translate(&translation);
             }
         }
     }
 
--- a/gfx/webrender/src/util.rs
+++ b/gfx/webrender/src/util.rs
@@ -6,16 +6,18 @@ use api::{BorderRadius, DeviceIntPoint, 
 use api::{DevicePoint, DeviceRect, DeviceSize, LayoutPixel, LayoutPoint, LayoutRect, LayoutSize};
 use api::{WorldPixel, WorldPoint, WorldRect};
 use euclid::{Point2D, Rect, Size2D, TypedPoint2D, TypedRect, TypedSize2D};
 use euclid::{TypedTransform2D, TypedTransform3D, TypedVector2D, TypedVector3D};
 use euclid::{HomogeneousVector};
 use num_traits::Zero;
 use plane_split::{Clipper, Plane, Polygon};
 use std::{i32, f32};
+use std::borrow::Cow;
+
 
 // Matches the definition of SK_ScalarNearlyZero in Skia.
 const NEARLY_ZERO: f32 = 1.0 / 4096.0;
 
 // TODO: Implement these in euclid!
 pub trait MatrixHelpers<Src, Dst> {
     fn preserves_2d_axis_alignment(&self) -> bool;
     fn has_perspective_component(&self) -> bool;
@@ -481,21 +483,30 @@ impl<Src, Dst> FastTransform<Src, Dst> {
         if transform.is_simple_2d_translation() {
             return FastTransform::Offset(TypedVector2D::new(transform.m41, transform.m42));
         }
         let inverse = transform.inverse();
         let is_2d = transform.is_2d();
         FastTransform::Transform { transform, inverse, is_2d}
     }
 
-    pub fn to_transform(&self) -> TypedTransform3D<f32, Src, Dst> {
+    pub fn kind(&self) -> TransformedRectKind {
         match *self {
-            FastTransform::Offset(offset) =>
-                TypedTransform3D::create_translation(offset.x, offset.y, 0.0),
-            FastTransform::Transform { transform, .. } => transform
+            FastTransform::Offset(_) => TransformedRectKind::AxisAligned,
+            FastTransform::Transform { ref transform, .. } if transform.preserves_2d_axis_alignment() => TransformedRectKind::AxisAligned,
+            FastTransform::Transform { .. } => TransformedRectKind::Complex,
+        }
+    }
+
+    pub fn to_transform(&self) -> Cow<TypedTransform3D<f32, Src, Dst>> {
+        match *self {
+            FastTransform::Offset(offset) => Cow::Owned(
+                TypedTransform3D::create_translation(offset.x, offset.y, 0.0)
+            ),
+            FastTransform::Transform { ref transform, .. } => Cow::Borrowed(transform),
         }
     }
 
     pub fn is_invertible(&self) -> bool {
         match *self {
             FastTransform::Offset(..) => true,
             FastTransform::Transform { ref inverse, .. } => inverse.is_some(),
         }
@@ -524,25 +535,16 @@ impl<Src, Dst> FastTransform<Src, Dst> {
             FastTransform::Offset(ref offset) =>
                 FastTransform::Offset(*offset + *other_offset),
             FastTransform::Transform { transform, .. } =>
                 FastTransform::with_transform(transform.pre_translate(other_offset.to_3d()))
         }
     }
 
     #[inline(always)]
-    pub fn preserves_2d_axis_alignment(&self) -> bool {
-        match *self {
-            FastTransform::Offset(..) => true,
-            FastTransform::Transform { ref transform, .. } =>
-                transform.preserves_2d_axis_alignment(),
-        }
-    }
-
-    #[inline(always)]
     pub fn has_perspective_component(&self) -> bool {
         match *self {
             FastTransform::Offset(..) => false,
             FastTransform::Transform { ref transform, .. } => transform.has_perspective_component(),
         }
     }
 
     #[inline(always)]
@@ -591,27 +593,16 @@ impl<Src, Dst> FastTransform<Src, Dst> {
             FastTransform::Transform { inverse: Some(ref inverse), is_2d: true, .. }  =>
                 inverse.transform_rect(rect),
             FastTransform::Transform { ref transform, is_2d: false, .. } =>
                 Some(transform.inverse_rect_footprint(rect)),
             FastTransform::Transform { inverse: None, .. }  => None,
         }
     }
 
-    #[inline(always)]
-    pub fn offset(&self, new_offset: TypedVector2D<f32, Src>) -> Self {
-        match *self {
-            FastTransform::Offset(offset) => FastTransform::Offset(offset + new_offset),
-            FastTransform::Transform { ref transform, .. } => {
-                let transform = transform.pre_translate(new_offset.to_3d());
-                FastTransform::with_transform(transform)
-            }
-        }
-    }
-
     pub fn post_translate(&self, new_offset: TypedVector2D<f32, Dst>) -> Self {
         match *self {
             FastTransform::Offset(offset) => {
                 let offset = offset.to_untyped() + new_offset.to_untyped();
                 FastTransform::Offset(TypedVector2D::from_untyped(&offset))
             }
             FastTransform::Transform { ref transform, .. } => {
                 let transform = transform.post_translate(new_offset.to_3d());
@@ -630,40 +621,24 @@ impl<Src, Dst> FastTransform<Src, Dst> {
                     transform: inverse,
                     inverse: Some(transform),
                     is_2d
                 }),
             FastTransform::Transform { inverse: None, .. } => None,
 
         }
     }
-
-    pub fn update(&self, transform: TypedTransform3D<f32, Src, Dst>) -> Option<Self> {
-        if transform.is_simple_2d_translation() {
-            Some(self.offset(TypedVector2D::new(transform.m41, transform.m42)))
-        } else {
-            // If we break 2D axis alignment or have a perspective component, we need to start a
-            // new incompatible coordinate system with which we cannot share clips without masking.
-            None
-        }
-    }
 }
 
 impl<Src, Dst> From<TypedTransform3D<f32, Src, Dst>> for FastTransform<Src, Dst> {
     fn from(transform: TypedTransform3D<f32, Src, Dst>) -> Self {
         FastTransform::with_transform(transform)
     }
 }
 
-impl<Src, Dst> Into<TypedTransform3D<f32, Src, Dst>> for FastTransform<Src, Dst> {
-    fn into(self) -> TypedTransform3D<f32, Src, Dst> {
-        self.to_transform()
-    }
-}
-
 impl<Src, Dst> From<TypedVector2D<f32, Src>> for FastTransform<Src, Dst> {
     fn from(vector: TypedVector2D<f32, Src>) -> Self {
         FastTransform::with_vector(vector)
     }
 }
 
 pub type LayoutFastTransform = FastTransform<LayoutPixel, LayoutPixel>;
 pub type LayoutToWorldFastTransform = FastTransform<LayoutPixel, WorldPixel>;
--- a/gfx/webrender_api/Cargo.toml
+++ b/gfx/webrender_api/Cargo.toml
@@ -7,22 +7,22 @@ repository = "https://github.com/servo/w
 
 [features]
 nightly = ["euclid/unstable", "serde/unstable"]
 ipc = ["ipc-channel"]
 serialize = []
 deserialize = []
 
 [dependencies]
-app_units = "0.6"
+app_units = "0.7"
 bincode = "1.0"
 bitflags = "1.0"
 byteorder = "1.2.1"
 ipc-channel = {version = "0.10.0", optional = true}
-euclid = { version = "0.18", features = ["serde"] }
+euclid = { version = "0.19", features = ["serde"] }
 serde = { version = "=1.0.66", features = ["rc"] }
 serde_derive = { version = "=1.0.66", features = ["deserialize_in_place"] }
 serde_bytes = "0.10"
 time = "0.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.6"
 core-graphics = "0.16"
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -118,17 +118,17 @@ pub struct AuxIter<'a, T> {
     data: &'a [u8],
     size: usize,
     _boo: PhantomData<T>,
 }
 
 impl BuiltDisplayListDescriptor {}
 
 impl BuiltDisplayList {
-    pub fn from_data(data: Vec<u8>, descriptor: BuiltDisplayListDescriptor) -> BuiltDisplayList {
+    pub fn from_data(data: Vec<u8>, descriptor: BuiltDisplayListDescriptor) -> Self {
         BuiltDisplayList { data, descriptor }
     }
 
     pub fn into_data(mut self) -> (Vec<u8>, BuiltDisplayListDescriptor) {
         self.descriptor.send_start_time = precise_time_ns();
         (self.data, self.descriptor)
     }
 
--- a/gfx/webrender_bindings/Cargo.toml
+++ b/gfx/webrender_bindings/Cargo.toml
@@ -2,18 +2,18 @@
 name = "webrender_bindings"
 version = "0.1.0"
 authors = ["The Mozilla Project Developers"]
 license = "MPL-2.0"
 
 [dependencies]
 rayon = "1"
 thread_profiler = "0.1.1"
-euclid = { version = "0.18", features = ["serde"] }
-app_units = "0.6"
+euclid = { version = "0.19", features = ["serde"] }
+app_units = "0.7"
 gleam = "0.6"
 log = "0.4"
 nsstring = { path = "../../servo/support/gecko/nsstring" }
 bincode = "1.0"
 uuid = {version = "0.1.18"}
 fxhash = "0.2.1"
 
 [dependencies.webrender]
--- a/gfx/webrender_bindings/revision.txt
+++ b/gfx/webrender_bindings/revision.txt
@@ -1,1 +1,1 @@
-8a4fe66528aa362721e4048aac3cd5abf7faaf2c
+7a1b919e37d6cd0155077aa90f98cfcdf9fa5bae
--- a/gfx/wrench/Cargo.toml
+++ b/gfx/wrench/Cargo.toml
@@ -5,20 +5,20 @@ authors = ["Vladimir Vukicevic <vladimir
 build = "build.rs"
 license = "MPL-2.0"
 
 [dependencies]
 base64 = "0.6"
 bincode = "1.0"
 byteorder = "1.0"
 env_logger = { version = "0.5", optional = true }
-euclid = "0.18"
+euclid = "0.19"
 gleam = "0.6"
 glutin = "0.17"
-app_units = "0.6"
+app_units = "0.7"
 image = "0.19"
 clap = { version = "2", features = ["yaml"] }
 lazy_static = "1"
 log = "0.4"
 yaml-rust = { git = "https://github.com/vvuk/yaml-rust", features = ["preserve_order"] }
 serde_json = "1.0"
 ron = "0.1.5"
 time = "0.1"