--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1177,16 +1177,17 @@ dependencies = [
"nsstring 0.1.0",
]
[[package]]
name = "nsstring"
version = "0.1.0"
dependencies = [
"bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_rs 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "nsstring-gtest"
version = "0.1.0"
dependencies = [
"nsstring 0.1.0",
]
--- a/servo/components/style/properties/gecko.mako.rs
+++ b/servo/components/style/properties/gecko.mako.rs
@@ -2131,25 +2131,25 @@ fn static_assert() {
};
let mut refptr = unsafe {
UniqueRefPtr::from_addrefed(
Gecko_NewGridTemplateAreasValue(v.0.areas.len() as u32, v.0.strings.len() as u32, v.0.width))
};
for (servo, gecko) in v.0.areas.into_iter().zip(refptr.mNamedAreas.iter_mut()) {
- gecko.mName.assign_utf8(&*servo.name);
+ gecko.mName.assign_str(&*servo.name);
gecko.mColumnStart = servo.columns.start;
gecko.mColumnEnd = servo.columns.end;
gecko.mRowStart = servo.rows.start;
gecko.mRowEnd = servo.rows.end;
}
for (servo, gecko) in v.0.strings.into_iter().zip(refptr.mTemplates.iter_mut()) {
- gecko.assign_utf8(&*servo);
+ gecko.assign_str(&*servo);
}
self.gecko.mGridTemplateAreas.set_move(refptr.get())
}
pub fn copy_grid_template_areas_from(&mut self, other: &Self) {
unsafe { self.gecko.mGridTemplateAreas.set(&other.gecko.mGridTemplateAreas) }
}
@@ -4138,18 +4138,18 @@ fn static_assert() {
use gecko_bindings::bindings::Gecko_NewStyleQuoteValues;
use gecko_bindings::sugar::refptr::UniqueRefPtr;
let mut refptr = unsafe {
UniqueRefPtr::from_addrefed(Gecko_NewStyleQuoteValues(other.0.len() as u32))
};
for (servo, gecko) in other.0.into_iter().zip(refptr.mQuotePairs.iter_mut()) {
- gecko.first.assign_utf8(&servo.0);
- gecko.second.assign_utf8(&servo.1);
+ gecko.first.assign_str(&servo.0);
+ gecko.second.assign_str(&servo.1);
}
self.gecko.mQuotes.set_move(refptr.get())
}
pub fn copy_quotes_from(&mut self, other: &Self) {
unsafe { self.gecko.mQuotes.set(&other.gecko.mQuotes); }
}
@@ -4728,17 +4728,17 @@ fn static_assert() {
};
(shape | fill, keyword.shape.char(keyword.fill))
},
TextEmphasisStyle::String(ref s) => {
(structs::NS_STYLE_TEXT_EMPHASIS_STYLE_STRING, &**s)
},
};
- self.gecko.mTextEmphasisStyleString.assign_utf8(s);
+ self.gecko.mTextEmphasisStyleString.assign_str(s);
self.gecko.mTextEmphasisStyle = te as u8;
}
pub fn copy_text_emphasis_style_from(&mut self, other: &Self) {
self.clear_text_emphasis_style_if_string();
if other.gecko.mTextEmphasisStyle == structs::NS_STYLE_TEXT_EMPHASIS_STYLE_STRING as u8 {
self.gecko.mTextEmphasisStyleString
.assign(&*other.gecko.mTextEmphasisStyleString)
@@ -4829,17 +4829,17 @@ fn static_assert() {
use gecko_bindings::structs::nsStyleTextOverflowSide;
use values::specified::text::TextOverflowSide;
fn set(side: &mut nsStyleTextOverflowSide, value: &TextOverflowSide) {
let ty = match *value {
TextOverflowSide::Clip => structs::NS_STYLE_TEXT_OVERFLOW_CLIP,
TextOverflowSide::Ellipsis => structs::NS_STYLE_TEXT_OVERFLOW_ELLIPSIS,
TextOverflowSide::String(ref s) => {
- side.mString.assign_utf8(s);
+ side.mString.assign_str(s);
structs::NS_STYLE_TEXT_OVERFLOW_STRING
}
};
side.mType = ty as u8;
}
self.clear_overflow_sides_if_string();
self.gecko.mTextOverflow.mLogicalDirections = v.sides_are_logical;
@@ -5451,17 +5451,17 @@ clip-path
) {
debug_assert!(content_type == eStyleContentType_Counter ||
content_type == eStyleContentType_Counters);
let counter_func = unsafe {
bindings::Gecko_SetCounterFunction(data, content_type).as_mut().unwrap()
};
counter_func.mIdent.assign(name.0.as_slice());
if content_type == eStyleContentType_Counters {
- counter_func.mSeparator.assign_utf8(sep);
+ counter_func.mSeparator.assign_str(sep);
}
style.to_gecko_value(&mut counter_func.mCounterStyle, device);
}
match v {
Content::None |
Content::Normal => {
// Ensure destructors run, otherwise we could leak.
--- a/servo/ports/geckolib/glue.rs
+++ b/servo/ports/geckolib/glue.rs
@@ -2869,17 +2869,17 @@ pub extern "C" fn Servo_DeclarationBlock
pub extern "C" fn Servo_DeclarationBlock_GetNthProperty(
declarations: RawServoDeclarationBlockBorrowed,
index: u32,
result: *mut nsAString,
) -> bool {
read_locked_arc(declarations, |decls: &PropertyDeclarationBlock| {
if let Some(decl) = decls.declarations().get(index as usize) {
let result = unsafe { result.as_mut().unwrap() };
- result.assign_utf8(&decl.id().name());
+ result.assign_str(&decl.id().name());
true
} else {
false
}
})
}
macro_rules! get_property_id_from_property {
--- a/servo/support/gecko/nsstring/Cargo.toml
+++ b/servo/support/gecko/nsstring/Cargo.toml
@@ -5,9 +5,9 @@ authors = ["nobody@mozilla.com"]
license = "MPL-2.0"
description = "Rust bindings to xpcom string types"
[features]
gecko_debug = []
[dependencies]
bitflags = "1.0"
-
+encoding_rs = "0.7.2"
new file mode 100644
--- /dev/null
+++ b/servo/support/gecko/nsstring/src/conversions.rs
@@ -0,0 +1,506 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+extern crate encoding_rs;
+
+use super::nsAString;
+use super::nsACString;
+use super::nsCStringLike;
+use super::Latin1StringLike;
+use super::Gecko_FallibleAssignCString;
+
+use conversions::encoding_rs::mem::*;
+use conversions::encoding_rs::Encoding;
+
+/// Required math stated in the docs of
+/// `convert_utf16_to_utf8()`.
+#[inline(always)]
+fn times_three_plus_one(a: usize) -> Option<usize> {
+ a.checked_mul(3)?.checked_add(1)
+}
+
+#[inline(always)]
+fn fallible_times_two(a: usize) -> Result<usize, ()> {
+ a.checked_mul(2).ok_or(())
+}
+
+/// A conversion where the number of code units in the output is potentially
+/// smaller than the number of code units in the input.
+///
+/// Takes the name of the method to be generated, the name of the conversion
+/// function and the type of the input slice.
+macro_rules! shrinking_conversion {
+ ($name:ident,
+ $convert:ident,
+ $other_ty:ty) => (
+ fn $name(&mut self, other: $other_ty, old_len: usize) -> Result<(), ()> {
+ let written = {
+ let buffer = unsafe {
+ self.fallible_maybe_expand_capacity(old_len.checked_add(other.len()).ok_or(())?)?
+ };
+ $convert(other, &mut buffer[old_len..])
+ };
+ unsafe {
+ // TODO: Shrink buffer
+ self.fallible_set_length((old_len + written) as u32)?;
+ }
+ Ok(())
+ }
+ )
+}
+
+/// A conversion where the number of code units in the output is always equal
+/// to the number of code units in the input.
+///
+/// Takes the name of the method to be generated, the name of the conversion
+/// function and the type of the input slice.
+macro_rules! constant_conversion {
+ ($name:ident,
+ $convert:ident,
+ $other_ty:ty) => (
+ fn $name(&mut self, other: $other_ty, old_len: usize) -> Result<(), ()> {
+ let needed = old_len.checked_add(other.len()).ok_or(())?;
+ {
+ let buffer = unsafe {
+ self.fallible_maybe_expand_capacity(needed)?
+ };
+ $convert(other, &mut buffer[old_len..])
+ }
+ unsafe {
+ // Truncation to u32 is OK, because `fallible_maybe_expand_capacity()`
+ // would have failed if not.
+ self.fallible_set_length(needed as u32)?;
+ }
+ Ok(())
+ }
+ )
+}
+
+/// An intermediate check for avoiding a copy and having an `nsStringBuffer`
+/// refcount increment instead when both `self` and `other` are `nsACString`s,
+/// `other` is entirely ASCII and all old data in `self` is discarded.
+macro_rules! ascii_copy_avoidance {
+ ($name:ident,
+ $impl:ident,
+ $string_like:ident) => (
+ fn $name<T: $string_like + ?Sized>(&mut self, other: &T, old_len: usize) -> Result<(), ()> {
+ let adapter = other.adapt();
+ let other_slice = adapter.as_ref();
+ let num_ascii = if adapter.is_abstract() && old_len == 0 {
+ let up_to = Encoding::ascii_valid_up_to(other_slice);
+ if up_to == other_slice.len() {
+ // Calling something whose argument can be obtained from
+ // the adapter rather than an nsStringLike avoids a huge
+ // lifetime mess by keeping nsStringLike and
+ // Latin1StringLike free of lifetime interdependencies.
+ if unsafe { Gecko_FallibleAssignCString(self, other.adapt().as_ptr()) } {
+ return Ok(());
+ } else {
+ return Err(());
+ }
+ }
+ Some(up_to)
+ } else {
+ None
+ };
+ self.$impl(other_slice, old_len, num_ascii)
+ }
+ )
+}
+
+impl nsAString {
+
+ // Valid UTF-8 to UTF-16
+
+ shrinking_conversion!(fallible_append_str_impl, convert_str_to_utf16, &str);
+
+ /// Convert a valid UTF-8 string into valid UTF-16 and replace the content
+ /// of this string with the conversion result.
+ pub fn assign_str(&mut self, other: &str) {
+ self.fallible_append_str_impl(other, 0).expect("Out of memory");
+ }
+
+ /// Convert a valid UTF-8 string into valid UTF-16 and fallibly replace the
+ /// content of this string with the conversion result.
+ pub fn fallible_assign_str(&mut self, other: &str) -> Result<(), ()> {
+ self.fallible_append_str_impl(other, 0)
+ }
+
+ /// Convert a valid UTF-8 string into valid UTF-16 and append the conversion
+ /// to this string.
+ pub fn append_str(&mut self, other: &str) {
+ let len = self.len();
+ self.fallible_append_str_impl(other, len).expect("Out of memory");
+ }
+
+ /// Convert a valid UTF-8 string into valid UTF-16 and fallibly append the
+ /// conversion to this string.
+ pub fn fallible_append_str(&mut self, other: &str) -> Result<(), ()> {
+ let len = self.len();
+ self.fallible_append_str_impl(other, len)
+ }
+
+ // Potentially-invalid UTF-8 to UTF-16
+
+ shrinking_conversion!(fallible_append_utf8_impl, convert_utf8_to_utf16, &[u8]);
+
+ /// Convert a potentially-invalid UTF-8 string into valid UTF-16
+ /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+ /// replace the content of this string with the conversion result.
+ pub fn assign_utf8(&mut self, other: &[u8]) {
+ self.fallible_append_utf8_impl(other, 0).expect("Out of memory");
+ }
+
+ /// Convert a potentially-invalid UTF-8 string into valid UTF-16
+ /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+ /// fallibly replace the content of this string with the conversion result.
+ pub fn fallible_assign_utf8(&mut self, other: &[u8]) -> Result<(), ()> {
+ self.fallible_append_utf8_impl(other, 0)
+ }
+
+ /// Convert a potentially-invalid UTF-8 string into valid UTF-16
+ /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+ /// append the conversion result to this string.
+ pub fn append_utf8(&mut self, other: &[u8]) {
+ let len = self.len();
+ self.fallible_append_utf8_impl(other, len).expect("Out of memory");
+ }
+
+ /// Convert a potentially-invalid UTF-8 string into valid UTF-16
+ /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+ /// fallibly append the conversion result to this string.
+ pub fn fallible_append_utf8(&mut self, other: &[u8]) -> Result<(), ()> {
+ let len = self.len();
+ self.fallible_append_utf8_impl(other, len)
+ }
+
+ // Latin1 to UTF-16
+
+ constant_conversion!(fallible_append_latin1_impl, convert_latin1_to_utf16, &[u8]);
+
+ /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+ /// into UTF-16 and replace the content of this string with the conversion result.
+ pub fn assign_latin1(&mut self, other: &[u8]) {
+ self.fallible_append_latin1_impl(other, 0).expect("Out of memory");
+ }
+
+ /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+ /// into UTF-16 and fallibly replace the content of this string with the
+ /// conversion result.
+ pub fn fallible_assign_latin1(&mut self, other: &[u8]) -> Result<(), ()> {
+ self.fallible_append_latin1_impl(other, 0)
+ }
+
+ /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+ /// into UTF-16 and append the conversion result to this string.
+ pub fn append_latin1(&mut self, other: &[u8]) {
+ let len = self.len();
+ self.fallible_append_latin1_impl(other, len).expect("Out of memory");
+ }
+
+ /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+ /// into UTF-16 and fallibly append the conversion result to this string.
+ pub fn fallible_append_latin1(&mut self, other: &[u8]) -> Result<(), ()> {
+ let len = self.len();
+ self.fallible_append_latin1_impl(other, len)
+ }
+
+}
+
+impl nsACString {
+
+ // UTF-16 to UTF-8
+
+ fn fallible_append_utf16_to_utf8_impl(&mut self, other: &[u16], old_len: usize) -> Result<(), ()> {
+ // We first size the buffer for ASCII if the first code unit is ASCII. If that turns out not to
+ // be enough, we size for the worst case given the length of the remaining input at that point.
+ // Lexical lifetimes make this a bit messy.
+ let mut written = 0;
+ {
+ if let Some(first) = other.first() {
+ let (needed, filled, num_ascii) = if *first < 0x80 {
+ let buffer = unsafe {
+ self.fallible_maybe_expand_capacity(old_len.checked_add(other.len()).ok_or(())?)?
+ };
+ let num_ascii = copy_basic_latin_to_ascii(other, &mut buffer[old_len..]);
+ let filled = old_len + num_ascii;
+ let available = buffer.len() - filled;
+ let left = other.len() - num_ascii;
+ let needed = times_three_plus_one(left).ok_or(())?;
+ if needed <= available {
+ written = num_ascii + convert_utf16_to_utf8(&other[num_ascii..], &mut buffer[filled..]);
+ (0, 0, 0)
+ } else {
+ (needed, filled, num_ascii)
+ }
+ } else {
+ let needed = times_three_plus_one(other.len()).ok_or(())?;
+ (needed, 0, 0)
+ };
+ if needed != 0 {
+ let buffer = unsafe {
+ self.fallible_maybe_expand_capacity(filled.checked_add(needed).ok_or(())?)?
+ };
+ written = num_ascii + convert_utf16_to_utf8(&other[num_ascii..], &mut buffer[filled..]);
+ }
+ } else {
+ return Ok(())
+ }
+ }
+ unsafe {
+ // TODO: Shrink buffer
+ self.fallible_set_length((old_len + written) as u32)?;
+ }
+ Ok(())
+ }
+
+ /// Convert a potentially-invalid UTF-16 string into valid UTF-8
+ /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+ /// replace the content of this string with the conversion result.
+ pub fn assign_utf16_to_utf8(&mut self, other: &[u16]) {
+ self.fallible_append_utf16_to_utf8_impl(other, 0).expect("Out of memory");
+ }
+
+ /// Convert a potentially-invalid UTF-16 string into valid UTF-8
+ /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+ /// fallibly replace the content of this string with the conversion result.
+ pub fn fallible_assign_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()> {
+ self.fallible_append_utf16_to_utf8_impl(other, 0)
+ }
+
+ /// Convert a potentially-invalid UTF-16 string into valid UTF-8
+ /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+ /// append the conversion result to this string.
+ pub fn append_utf16_to_utf8(&mut self, other: &[u16]) {
+ let len = self.len();
+ self.fallible_append_utf16_to_utf8_impl(other, len).expect("Out of memory");
+ }
+
+ /// Convert a potentially-invalid UTF-16 string into valid UTF-8
+ /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+ /// fallibly append the conversion result to this string.
+ pub fn fallible_append_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()> {
+ let len = self.len();
+ self.fallible_append_utf16_to_utf8_impl(other, len)
+ }
+
+ // UTF-16 to Latin1
+
+ constant_conversion!(fallible_append_utf16_to_latin1_lossy_impl, convert_utf16_to_latin1_lossy, &[u16]);
+
+ /// Convert a UTF-16 string whose all code points are below U+0100 into
+ /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+ /// replace the content of this string with the conversion result.
+ ///
+ /// # Panics
+ ///
+ /// If the input contains code points above U+00FF or is not valid UTF-16,
+ /// panics in debug mode and produces garbage in a memory-safe way in
+ /// release builds. The nature of the garbage may differ based on CPU
+ /// architecture and must not be relied upon.
+ pub fn assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) {
+ self.fallible_append_utf16_to_latin1_lossy_impl(other, 0).expect("Out of memory");
+ }
+
+ /// Convert a UTF-16 string whose all code points are below U+0100 into
+ /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+ /// fallibly replace the content of this string with the conversion result.
+ ///
+ /// # Panics
+ ///
+ /// If the input contains code points above U+00FF or is not valid UTF-16,
+ /// panics in debug mode and produces garbage in a memory-safe way in
+ /// release builds. The nature of the garbage may differ based on CPU
+ /// architecture and must not be relied upon.
+ pub fn fallible_assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()> {
+ self.fallible_append_utf16_to_latin1_lossy_impl(other, 0)
+ }
+
+ /// Convert a UTF-16 string whose all code points are below U+0100 into
+ /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+ /// append the conversion result to this string.
+ ///
+ /// # Panics
+ ///
+ /// If the input contains code points above U+00FF or is not valid UTF-16,
+ /// panics in debug mode and produces garbage in a memory-safe way in
+ /// release builds. The nature of the garbage may differ based on CPU
+ /// architecture and must not be relied upon.
+ pub fn append_utf16_to_latin1_lossy(&mut self, other: &[u16]) {
+ let len = self.len();
+ self.fallible_append_utf16_to_latin1_lossy_impl(other, len).expect("Out of memory");
+ }
+
+ /// Convert a UTF-16 string whose all code points are below U+0100 into
+ /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+ /// fallibly append the conversion result to this string.
+ ///
+ /// # Panics
+ ///
+ /// If the input contains code points above U+00FF or is not valid UTF-16,
+ /// panics in debug mode and produces garbage in a memory-safe way in
+ /// release builds. The nature of the garbage may differ based on CPU
+ /// architecture and must not be relied upon.
+ pub fn fallible_append_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()> {
+ let len = self.len();
+ self.fallible_append_utf16_to_latin1_lossy_impl(other, len)
+ }
+
+ // UTF-8 to Latin1
+
+ ascii_copy_avoidance!(fallible_append_utf8_to_latin1_lossy_check, fallible_append_utf8_to_latin1_lossy_impl, nsCStringLike);
+
+ fn fallible_append_utf8_to_latin1_lossy_impl(&mut self, other: &[u8], old_len: usize, maybe_num_ascii: Option<usize>) -> Result<(), ()> {
+ let num_ascii = maybe_num_ascii.unwrap_or(0);
+ // This may overflow, but if overflow happens here, an overflow also happens where checked.
+ let old_len_plus_num_ascii = old_len + num_ascii;
+ let written = {
+ let buffer = unsafe {
+ self.fallible_maybe_expand_capacity(old_len.checked_add(other.len()).ok_or(())?)?
+ };
+ if num_ascii != 0 {
+ (&mut buffer[old_len..old_len_plus_num_ascii]).copy_from_slice(&other[..num_ascii]);
+ }
+ convert_utf8_to_latin1_lossy(&other[num_ascii..], &mut buffer[old_len_plus_num_ascii..])
+ };
+ unsafe {
+ // TODO: Shrink buffer
+ self.fallible_set_length((old_len_plus_num_ascii + written) as u32)?;
+ }
+ Ok(())
+ }
+
+ /// Convert a UTF-8 string whose all code points are below U+0100 into
+ /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+ /// replace the content of this string with the conversion result.
+ ///
+ /// # Panics
+ ///
+ /// If the input contains code points above U+00FF or is not valid UTF-8,
+ /// panics in debug mode and produces garbage in a memory-safe way in
+ /// release builds. The nature of the garbage may differ based on CPU
+ /// architecture and must not be relied upon.
+ pub fn assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
+ self.fallible_append_utf8_to_latin1_lossy_check(other, 0).expect("Out of memory");
+ }
+
+ /// Convert a UTF-8 string whose all code points are below U+0100 into
+ /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+ /// fallibly replace the content of this string with the conversion result.
+ ///
+ /// # Panics
+ ///
+ /// If the input contains code points above U+00FF or is not valid UTF-8,
+ /// panics in debug mode and produces garbage in a memory-safe way in
+ /// release builds. The nature of the garbage may differ based on CPU
+ /// architecture and must not be relied upon.
+ pub fn fallible_assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
+ self.fallible_append_utf8_to_latin1_lossy_check(other, 0)
+ }
+
+ /// Convert a UTF-8 string whose all code points are below U+0100 into
+ /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+ /// append the conversion result to this string.
+ ///
+ /// # Panics
+ ///
+ /// If the input contains code points above U+00FF or is not valid UTF-8,
+ /// panics in debug mode and produces garbage in a memory-safe way in
+ /// release builds. The nature of the garbage may differ based on CPU
+ /// architecture and must not be relied upon.
+ pub fn append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
+ let len = self.len();
+ self.fallible_append_utf8_to_latin1_lossy_check(other, len).expect("Out of memory");
+ }
+
+ /// Convert a UTF-8 string whose all code points are below U+0100 into
+ /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+ /// fallibly append the conversion result to this string.
+ ///
+ /// # Panics
+ ///
+ /// If the input contains code points above U+00FF or is not valid UTF-8,
+ /// panics in debug mode and produces garbage in a memory-safe way in
+ /// release builds. The nature of the garbage may differ based on CPU
+ /// architecture and must not be relied upon.
+ pub fn fallible_append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
+ let len = self.len();
+ self.fallible_append_utf8_to_latin1_lossy_check(other, len)
+ }
+
+ // Latin1 to UTF-8 CString
+
+ ascii_copy_avoidance!(fallible_append_latin1_to_utf8_check, fallible_append_latin1_to_utf8_impl, Latin1StringLike);
+
+ fn fallible_append_latin1_to_utf8_impl(&mut self, other: &[u8], old_len: usize, maybe_num_ascii: Option<usize>) -> Result<(), ()> {
+ // We first size the buffer for ASCII. If that turns out not to be enough, we size for the worst
+ // case given the length of the remaining input at that point. Lexical lifetimes make this a bit
+ // messy.
+ let mut written = 0;
+ {
+ let (needed, filled, num_ascii) = {
+ if let Some(num_ascii) = maybe_num_ascii {
+ let filled = old_len + num_ascii;
+ let left = other.len() - num_ascii;
+ let needed = fallible_times_two(left)?;
+ (needed, filled, num_ascii)
+ } else {
+ let buffer = unsafe {
+ self.fallible_maybe_expand_capacity(old_len.checked_add(other.len()).ok_or(())?)?
+ };
+ let num_ascii = copy_ascii_to_ascii(other, &mut buffer[old_len..]);
+ let filled = old_len + num_ascii;
+ let available = buffer.len() - filled;
+ let left = other.len() - num_ascii;
+ let needed = fallible_times_two(left)?;
+ if needed <= available {
+ written = num_ascii + convert_latin1_to_utf8(&other[num_ascii..], &mut buffer[filled..]);
+ (0, 0, 0)
+ } else {
+ (needed, filled, num_ascii)
+ }
+ }
+ };
+ if needed != 0 {
+ let buffer = unsafe {
+ self.fallible_maybe_expand_capacity(filled.checked_add(needed).ok_or(())?)?
+ };
+ written = num_ascii + convert_latin1_to_utf8(&other[num_ascii..], &mut buffer[filled..]);
+ }
+ }
+ unsafe {
+ // TODO: Shrink buffer
+ self.fallible_set_length((old_len + written) as u32)?;
+ }
+ Ok(())
+ }
+
+ /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+ /// into UTF-8 and replace the content of this string with the conversion result.
+ pub fn assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) {
+ self.fallible_append_latin1_to_utf8_check(other, 0).expect("Out of memory");
+ }
+
+ /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+ /// into UTF-8 and fallibly replace the content of this string with the
+ /// conversion result.
+ pub fn fallible_assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
+ self.fallible_append_latin1_to_utf8_check(other, 0)
+ }
+
+ /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+ /// into UTF-8 and append the conversion result to this string.
+ pub fn append_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) {
+ let len = self.len();
+ self.fallible_append_latin1_to_utf8_check(other, len).expect("Out of memory");
+ }
+
+ /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+ /// into UTF-8 and fallibly append the conversion result to this string.
+ pub fn fallible_append_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
+ let len = self.len();
+ self.fallible_append_latin1_to_utf8_check(other, len)
+ }
+
+}
--- a/servo/support/gecko/nsstring/src/lib.rs
+++ b/servo/support/gecko/nsstring/src/lib.rs
@@ -123,16 +123,18 @@ use std::fmt;
use std::marker::PhantomData;
use std::mem;
use std::ops::{Deref, DerefMut};
use std::os::raw::c_void;
use std::slice;
use std::str;
use std::u32;
+mod conversions;
+
///////////////////////////////////
// Internal Implementation Flags //
///////////////////////////////////
mod data_flags {
bitflags! {
// While this has the same layout as u16, it cannot be passed
// over FFI safely as a u16.
@@ -162,16 +164,91 @@ mod class_flags {
use class_flags::ClassFlags;
use data_flags::DataFlags;
////////////////////////////////////
// Generic String Bindings Macros //
////////////////////////////////////
+macro_rules! string_like {
+ {
+ char_t = $char_t: ty;
+
+ AString = $AString: ident;
+ String = $String: ident;
+ Str = $Str: ident;
+
+ StringLike = $StringLike: ident;
+ StringAdapter = $StringAdapter: ident;
+ } => {
+ /// This trait is implemented on types which are `ns[C]String`-like, in
+ /// that they can at very low cost be converted to a borrowed
+ /// `&nsA[C]String`. Unfortunately, the intermediate type
+ /// `ns[C]StringAdapter` is required as well due to types like `&[u8]`
+ /// needing to be (cheaply) wrapped in a `nsCString` on the stack to
+ /// create the `&nsACString`.
+ ///
+ /// This trait is used to DWIM when calling the methods on
+ /// `nsA[C]String`.
+ pub trait $StringLike {
+ fn adapt(&self) -> $StringAdapter;
+ }
+
+ impl<'a, T: $StringLike + ?Sized> $StringLike for &'a T {
+ fn adapt(&self) -> $StringAdapter {
+ <T as $StringLike>::adapt(*self)
+ }
+ }
+
+ impl<'a, T> $StringLike for borrow::Cow<'a, T>
+ where T: $StringLike + borrow::ToOwned + ?Sized {
+ fn adapt(&self) -> $StringAdapter {
+ <T as $StringLike>::adapt(self.as_ref())
+ }
+ }
+
+ impl $StringLike for $AString {
+ fn adapt(&self) -> $StringAdapter {
+ $StringAdapter::Abstract(self)
+ }
+ }
+
+ impl<'a> $StringLike for $Str<'a> {
+ fn adapt(&self) -> $StringAdapter {
+ $StringAdapter::Abstract(self)
+ }
+ }
+
+ impl $StringLike for $String {
+ fn adapt(&self) -> $StringAdapter {
+ $StringAdapter::Abstract(self)
+ }
+ }
+
+ impl $StringLike for [$char_t] {
+ fn adapt(&self) -> $StringAdapter {
+ $StringAdapter::Borrowed($Str::from(self))
+ }
+ }
+
+ impl $StringLike for Vec<$char_t> {
+ fn adapt(&self) -> $StringAdapter {
+ $StringAdapter::Borrowed($Str::from(&self[..]))
+ }
+ }
+
+ impl $StringLike for Box<[$char_t]> {
+ fn adapt(&self) -> $StringAdapter {
+ $StringAdapter::Borrowed($Str::from(&self[..]))
+ }
+ }
+ }
+}
+
macro_rules! define_string_types {
{
char_t = $char_t: ty;
AString = $AString: ident;
String = $String: ident;
Str = $Str: ident;
@@ -181,16 +258,17 @@ macro_rules! define_string_types {
StringRepr = $StringRepr: ident;
drop = $drop: ident;
assign = $assign: ident, $fallible_assign: ident;
take_from = $take_from: ident, $fallible_take_from: ident;
append = $append: ident, $fallible_append: ident;
set_length = $set_length: ident, $fallible_set_length: ident;
begin_writing = $begin_writing: ident, $fallible_begin_writing: ident;
+ fallible_maybe_expand_capacity = $fallible_maybe_expand_capacity: ident;
} => {
/// The representation of a ns[C]String type in C++. This type is
/// used internally by our definition of ns[C]String to ensure layout
/// compatibility with the C++ ns[C]String type.
///
/// This type may also be used in place of a C++ ns[C]String inside of
/// struct definitions which are shared with C++, as it has identical
/// layout to our ns[C]String type.
@@ -370,16 +448,43 @@ macro_rules! define_string_types {
Err(())
} else {
Ok(slice::from_raw_parts_mut(ptr, len))
}
}
}
}
+ /// Unshares the buffer of the string, sets the capacity to the
+ /// allocation size resulting from rounding up `len`. Set the
+ /// length of the string to the rounded-up capacity and returns
+ /// the buffer as a mutable slice.
+ ///
+ /// Fails also if the new length doesn't fit in 32 bits.
+ ///
+ /// # Safety
+ ///
+ /// Unsafe because of exposure of uninitialized memory.
+ unsafe fn fallible_maybe_expand_capacity(&mut self, len: usize) -> Result<&mut [$char_t], ()> {
+ if len == 0 {
+ self.fallible_set_length(0)?;
+ // Use an arbitrary non-null value as the pointer
+ Ok(slice::from_raw_parts_mut(0x1 as *mut $char_t, 0))
+ } else if len > u32::max_value() as usize {
+ Err(())
+ } else {
+ let mut len32 = len as u32;
+ let ptr = $fallible_maybe_expand_capacity(self, &mut len32);
+ if ptr.is_null() {
+ Err(())
+ } else {
+ Ok(slice::from_raw_parts_mut(ptr, len32 as usize))
+ }
+ }
+ }
}
impl Deref for $AString {
type Target = [$char_t];
fn deref(&self) -> &[$char_t] {
unsafe {
// All $AString values point to a struct prefix which is
// identical to $StringRepr, this we can transmute `self`
@@ -712,76 +817,35 @@ macro_rules! define_string_types {
fn deref(&self) -> &$AString {
match *self {
$StringAdapter::Borrowed(ref s) => s,
$StringAdapter::Abstract(ref s) => s,
}
}
}
- /// This trait is implemented on types which are `ns[C]String`-like, in
- /// that they can at very low cost be converted to a borrowed
- /// `&nsA[C]String`. Unfortunately, the intermediate type
- /// `ns[C]StringAdapter` is required as well due to types like `&[u8]`
- /// needing to be (cheaply) wrapped in a `nsCString` on the stack to
- /// create the `&nsACString`.
- ///
- /// This trait is used to DWIM when calling the methods on
- /// `nsA[C]String`.
- pub trait $StringLike {
- fn adapt(&self) -> $StringAdapter;
- }
-
- impl<'a, T: $StringLike + ?Sized> $StringLike for &'a T {
- fn adapt(&self) -> $StringAdapter {
- <T as $StringLike>::adapt(*self)
- }
- }
-
- impl<'a, T> $StringLike for borrow::Cow<'a, T>
- where T: $StringLike + borrow::ToOwned + ?Sized {
- fn adapt(&self) -> $StringAdapter {
- <T as $StringLike>::adapt(self.as_ref())
+ impl<'a> $StringAdapter<'a> {
+ #[allow(dead_code)]
+ fn is_abstract(&self) -> bool {
+ match *self {
+ $StringAdapter::Borrowed(_) => false,
+ $StringAdapter::Abstract(_) => true,
+ }
}
}
- impl $StringLike for $AString {
- fn adapt(&self) -> $StringAdapter {
- $StringAdapter::Abstract(self)
- }
- }
-
- impl<'a> $StringLike for $Str<'a> {
- fn adapt(&self) -> $StringAdapter {
- $StringAdapter::Abstract(self)
- }
- }
+ string_like! {
+ char_t = $char_t;
- impl $StringLike for $String {
- fn adapt(&self) -> $StringAdapter {
- $StringAdapter::Abstract(self)
- }
- }
+ AString = $AString;
+ String = $String;
+ Str = $Str;
- impl $StringLike for [$char_t] {
- fn adapt(&self) -> $StringAdapter {
- $StringAdapter::Borrowed($Str::from(self))
- }
- }
-
- impl $StringLike for Vec<$char_t> {
- fn adapt(&self) -> $StringAdapter {
- $StringAdapter::Borrowed($Str::from(&self[..]))
- }
- }
-
- impl $StringLike for Box<[$char_t]> {
- fn adapt(&self) -> $StringAdapter {
- $StringAdapter::Borrowed($Str::from(&self[..]))
- }
+ StringLike = $StringLike;
+ StringAdapter = $StringAdapter;
}
}
}
///////////////////////////////////////////
// Bindings for nsCString (u8 char type) //
///////////////////////////////////////////
@@ -798,43 +862,20 @@ define_string_types! {
StringRepr = nsCStringRepr;
drop = Gecko_FinalizeCString;
assign = Gecko_AssignCString, Gecko_FallibleAssignCString;
take_from = Gecko_TakeFromCString, Gecko_FallibleTakeFromCString;
append = Gecko_AppendCString, Gecko_FallibleAppendCString;
set_length = Gecko_SetLengthCString, Gecko_FallibleSetLengthCString;
begin_writing = Gecko_BeginWritingCString, Gecko_FallibleBeginWritingCString;
+ fallible_maybe_expand_capacity = Gecko_FallibleMaybeExpandCapacityCString;
}
impl nsACString {
- pub fn assign_utf16<T: nsStringLike + ?Sized>(&mut self, other: &T) {
- self.truncate();
- self.append_utf16(other);
- }
-
- pub fn fallible_assign_utf16<T: nsStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
- self.truncate();
- self.fallible_append_utf16(other)
- }
-
- pub fn append_utf16<T: nsStringLike + ?Sized>(&mut self, other: &T) {
- unsafe {
- Gecko_AppendUTF16toCString(self, other.adapt().as_ptr());
- }
- }
-
- pub fn fallible_append_utf16<T: nsStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
- if unsafe { Gecko_FallibleAppendUTF16toCString(self, other.adapt().as_ptr()) } {
- Ok(())
- } else {
- Err(())
- }
- }
-
pub unsafe fn as_str_unchecked(&self) -> &str {
str::from_utf8_unchecked(self)
}
}
impl<'a> From<&'a str> for nsCStr<'a> {
fn from(s: &'a str) -> nsCStr<'a> {
s.as_bytes().into()
@@ -910,16 +951,33 @@ impl nsCStringLike for String {
}
impl nsCStringLike for Box<str> {
fn adapt(&self) -> nsCStringAdapter {
nsCStringAdapter::Borrowed(nsCStr::from(&self[..]))
}
}
+/// This trait is implemented on types which are Latin1 `nsCString`-like,
+/// in that they can at very low cost be converted to a borrowed
+/// `&nsACString` and do not denote UTF-8ness in the Rust type system.
+///
+/// This trait is used to DWIM when calling the methods on
+/// `nsACString`.
+string_like! {
+ char_t = u8;
+
+ AString = nsACString;
+ String = nsCString;
+ Str = nsCStr;
+
+ StringLike = Latin1StringLike;
+ StringAdapter = nsCStringAdapter;
+}
+
///////////////////////////////////////////
// Bindings for nsString (u16 char type) //
///////////////////////////////////////////
define_string_types! {
char_t = u16;
AString = nsAString;
@@ -932,42 +990,17 @@ define_string_types! {
StringRepr = nsStringRepr;
drop = Gecko_FinalizeString;
assign = Gecko_AssignString, Gecko_FallibleAssignString;
take_from = Gecko_TakeFromString, Gecko_FallibleTakeFromString;
append = Gecko_AppendString, Gecko_FallibleAppendString;
set_length = Gecko_SetLengthString, Gecko_FallibleSetLengthString;
begin_writing = Gecko_BeginWritingString, Gecko_FallibleBeginWritingString;
-}
-
-impl nsAString {
- pub fn assign_utf8<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
- self.truncate();
- self.append_utf8(other);
- }
-
- pub fn fallible_assign_utf8<T: nsCStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
- self.truncate();
- self.fallible_append_utf8(other)
- }
-
- pub fn append_utf8<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
- unsafe {
- Gecko_AppendUTF8toString(self, other.adapt().as_ptr());
- }
- }
-
- pub fn fallible_append_utf8<T: nsCStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
- if unsafe { Gecko_FallibleAppendUTF8toString(self, other.adapt().as_ptr()) } {
- Ok(())
- } else {
- Err(())
- }
- }
+ fallible_maybe_expand_capacity = Gecko_FallibleMaybeExpandCapacityString;
}
// NOTE: The From impl for a string slice for nsString produces a <'static>
// lifetime, as it allocates.
impl<'a> From<&'a str> for nsString {
fn from(s: &'a str) -> nsString {
s.encode_utf16().collect::<Vec<u16>>().into()
}
@@ -979,17 +1012,17 @@ impl<'a> From<&'a String> for nsString {
}
}
// Support for the write!() macro for writing to nsStrings
impl fmt::Write for nsAString {
fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> {
// Directly invoke gecko's routines for appending utf8 strings to
// nsAString values, to avoid as much overhead as possible
- self.append_utf8(s);
+ self.append_str(s);
Ok(())
}
}
impl fmt::Display for nsAString {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
fmt::Display::fmt(&String::from_utf16_lossy(&self[..]), f)
}
@@ -1023,35 +1056,31 @@ extern "C" {
fn Gecko_AppendCString(this: *mut nsACString, other: *const nsACString);
fn Gecko_SetLengthCString(this: *mut nsACString, length: u32);
fn Gecko_BeginWritingCString(this: *mut nsACString) -> *mut u8;
fn Gecko_FallibleAssignCString(this: *mut nsACString, other: *const nsACString) -> bool;
fn Gecko_FallibleTakeFromCString(this: *mut nsACString, other: *mut nsACString) -> bool;
fn Gecko_FallibleAppendCString(this: *mut nsACString, other: *const nsACString) -> bool;
fn Gecko_FallibleSetLengthCString(this: *mut nsACString, length: u32) -> bool;
fn Gecko_FallibleBeginWritingCString(this: *mut nsACString) -> *mut u8;
+ fn Gecko_FallibleMaybeExpandCapacityCString(this: *mut nsACString, length: *mut u32) -> *mut u8;
fn Gecko_FinalizeString(this: *mut nsAString);
fn Gecko_AssignString(this: *mut nsAString, other: *const nsAString);
fn Gecko_TakeFromString(this: *mut nsAString, other: *mut nsAString);
fn Gecko_AppendString(this: *mut nsAString, other: *const nsAString);
fn Gecko_SetLengthString(this: *mut nsAString, length: u32);
fn Gecko_BeginWritingString(this: *mut nsAString) -> *mut u16;
fn Gecko_FallibleAssignString(this: *mut nsAString, other: *const nsAString) -> bool;
fn Gecko_FallibleTakeFromString(this: *mut nsAString, other: *mut nsAString) -> bool;
fn Gecko_FallibleAppendString(this: *mut nsAString, other: *const nsAString) -> bool;
fn Gecko_FallibleSetLengthString(this: *mut nsAString, length: u32) -> bool;
fn Gecko_FallibleBeginWritingString(this: *mut nsAString) -> *mut u16;
-
- // Gecko implementation in nsReadableUtils.cpp
- fn Gecko_AppendUTF16toCString(this: *mut nsACString, other: *const nsAString);
- fn Gecko_AppendUTF8toString(this: *mut nsAString, other: *const nsACString);
- fn Gecko_FallibleAppendUTF16toCString(this: *mut nsACString, other: *const nsAString) -> bool;
- fn Gecko_FallibleAppendUTF8toString(this: *mut nsAString, other: *const nsACString) -> bool;
+ fn Gecko_FallibleMaybeExpandCapacityString(this: *mut nsAString, length: *mut u32) -> *mut u16;
}
//////////////////////////////////////
// Repr Validation Helper Functions //
//////////////////////////////////////
pub mod test_helpers {
//! This module only exists to help with ensuring that the layout of the
--- a/xpcom/string/nsReadableUtils.cpp
+++ b/xpcom/string/nsReadableUtils.cpp
@@ -1239,32 +1239,8 @@ AppendUCS4ToUTF16(const uint32_t aSource
NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
if (IS_IN_BMP(aSource)) {
aDest.Append(char16_t(aSource));
} else {
aDest.Append(H_SURROGATE(aSource));
aDest.Append(L_SURROGATE(aSource));
}
}
-
-extern "C" {
-
-void Gecko_AppendUTF16toCString(nsACString* aThis, const nsAString* aOther)
-{
- AppendUTF16toUTF8(*aOther, *aThis);
-}
-
-void Gecko_AppendUTF8toString(nsAString* aThis, const nsACString* aOther)
-{
- AppendUTF8toUTF16(*aOther, *aThis);
-}
-
-bool Gecko_FallibleAppendUTF16toCString(nsACString* aThis, const nsAString* aOther)
-{
- return AppendUTF16toUTF8(*aOther, *aThis, mozilla::fallible);
-}
-
-bool Gecko_FallibleAppendUTF8toString(nsAString* aThis, const nsACString* aOther)
-{
- return AppendUTF8toUTF16(*aOther, *aThis, mozilla::fallible);
-}
-
-}
--- a/xpcom/string/nsSubstring.cpp
+++ b/xpcom/string/nsSubstring.cpp
@@ -451,16 +451,21 @@ char* Gecko_BeginWritingCString(nsACStri
return aThis->BeginWriting();
}
char* Gecko_FallibleBeginWritingCString(nsACString* aThis)
{
return aThis->BeginWriting(mozilla::fallible);
}
+char* Gecko_FallibleMaybeExpandCapacityCString(nsACString* aThis, uint32_t* aCapacity)
+{
+ return aThis->MaybeExpandCapacity(aCapacity, mozilla::fallible);
+}
+
void Gecko_FinalizeString(nsAString* aThis)
{
aThis->~nsAString();
}
void Gecko_AssignString(nsAString* aThis, const nsAString* aOther)
{
aThis->Assign(*aOther);
@@ -506,9 +511,14 @@ char16_t* Gecko_BeginWritingString(nsASt
return aThis->BeginWriting();
}
char16_t* Gecko_FallibleBeginWritingString(nsAString* aThis)
{
return aThis->BeginWriting(mozilla::fallible);
}
+char16_t* Gecko_FallibleMaybeExpandCapacityString(nsAString* aThis, uint32_t* aCapacity)
+{
+ return aThis->MaybeExpandCapacity(aCapacity, mozilla::fallible);
+}
+
} // extern "C"
--- a/xpcom/string/nsTSubstring.h
+++ b/xpcom/string/nsTSubstring.h
@@ -589,16 +589,44 @@ public:
SetLength(aNewLength);
}
/**
* buffer access
*/
+ /**
+ * If *aCapacity is larger than the current capacity, allocates a
+ * buffer whose length is at least *aCapacity.
+ *
+ * Sets *aCapacity and the string's length to the actual capacity.
+ *
+ * Returns a pointer to the start of the buffer or nullptr if
+ * allocation failed.
+ *
+ * Note that unlike GetMutableData, this rounds the length up to the
+ * capacity.
+ */
+ inline char_type* MaybeExpandCapacity(size_type* aCapacity, const fallible_t& aFallible)
+ {
+ // SetCapacity unshares a shared buffer even then resizing is not
+ // needed.
+ if (SetCapacity(*aCapacity, aFallible)) {
+ return nullptr;
+ }
+ size_type capacity = Capacity();
+ // SetCapacity doesn't stretch the logical length for us.
+ this->mLength = capacity;
+ *aCapacity = capacity;
+ char_type* ptr = base_string_type::mData;
+ // SetCapacity zero-terminated at intermediate length, not capacity.
+ ptr[capacity] = 0;
+ return ptr;
+ }
/**
* Get a const pointer to the string's internal buffer. The caller
* MUST NOT modify the characters at the returned address.
*
* @returns The length of the buffer in characters.
*/
inline size_type GetData(const char_type** aData) const