Bug 1372230 part 1 - Move the code behing mozilla::Encoding::ForName() from encoding_rs to encoding_glue.
MozReview-Commit-ID: 6cFRuyWoPhR
--- a/intl/Encoding.h
+++ b/intl/Encoding.h
@@ -30,16 +30,19 @@ class Encoder;
#define ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR mozilla::NotNull<const mozilla::Encoding*>
#define ENCODING_RS_ENCODER mozilla::Encoder
#define ENCODING_RS_DECODER mozilla::Decoder
#include "encoding_rs.h"
extern "C" {
+mozilla::Encoding const*
+mozilla_encoding_for_name(uint8_t const* name, size_t name_len);
+
nsresult
mozilla_encoding_decode_to_nsstring(mozilla::Encoding const** encoding,
uint8_t const* src,
size_t src_len,
nsAString* dst);
nsresult
mozilla_encoding_decode_to_nsstring_with_bom_removal(
@@ -255,17 +258,17 @@ public:
* The motivating use case for this method is interoperability with
* legacy Gecko code that represents encodings as name string instead of
* type-safe `Encoding` objects. Using this method for other purposes is
* most likely the wrong thing to do.
*/
static inline NotNull<const mozilla::Encoding*> ForName(
Span<const char> aName)
{
- return WrapNotNull(encoding_for_name(
+ return WrapNotNull(mozilla_encoding_for_name(
reinterpret_cast<const uint8_t*>(aName.Elements()), aName.Length()));
}
/**
* Writes the name of this encoding into `aName`.
*
* This name is appropriate to return as-is from the DOM
* `document.characterSet` property.
--- a/intl/encoding_glue/src/lib.rs
+++ b/intl/encoding_glue/src/lib.rs
@@ -11,16 +11,17 @@
// "top-level directory" in the above notice refers to
// third_party/rust/encoding_rs/.
extern crate encoding_rs;
extern crate nsstring;
extern crate nserror;
use std::slice;
+use std::cmp::Ordering;
use encoding_rs::*;
use nsstring::*;
use nserror::*;
// nsStringBuffer's internal bookkeeping takes 8 bytes from
// the allocation. Plus one for termination.
const NS_CSTRING_OVERHEAD: usize = 9;
@@ -47,16 +48,123 @@ macro_rules! try_dst_set_len {
unsafe {
if $dst.fallible_set_length(needed).is_err() {
return $ret;
}
}
)
}
+static ENCODINGS_SORTED_BY_NAME: [&'static Encoding; 39] = [&GBK_INIT,
+ &BIG5_INIT,
+ &IBM866_INIT,
+ &EUC_JP_INIT,
+ &KOI8_R_INIT,
+ &EUC_KR_INIT,
+ &KOI8_U_INIT,
+ &GB18030_INIT,
+ &UTF_16BE_INIT,
+ &UTF_16LE_INIT,
+ &SHIFT_JIS_INIT,
+ &MACINTOSH_INIT,
+ &ISO_8859_2_INIT,
+ &ISO_8859_3_INIT,
+ &ISO_8859_4_INIT,
+ &ISO_8859_5_INIT,
+ &ISO_8859_6_INIT,
+ &ISO_8859_7_INIT,
+ &ISO_8859_8_INIT,
+ &ISO_8859_10_INIT,
+ &ISO_8859_13_INIT,
+ &ISO_8859_14_INIT,
+ &WINDOWS_874_INIT,
+ &ISO_8859_15_INIT,
+ &ISO_8859_16_INIT,
+ &ISO_2022_JP_INIT,
+ &REPLACEMENT_INIT,
+ &WINDOWS_1250_INIT,
+ &WINDOWS_1251_INIT,
+ &WINDOWS_1252_INIT,
+ &WINDOWS_1253_INIT,
+ &WINDOWS_1254_INIT,
+ &WINDOWS_1255_INIT,
+ &WINDOWS_1256_INIT,
+ &WINDOWS_1257_INIT,
+ &WINDOWS_1258_INIT,
+ &ISO_8859_8_I_INIT,
+ &X_MAC_CYRILLIC_INIT,
+ &X_USER_DEFINED_INIT];
+
+/// If the argument matches exactly (case-sensitively; no whitespace
+/// removal performed) the name of an encoding, returns
+/// `const Encoding*` representing that encoding. Otherwise panics.
+///
+/// The motivating use case for this function is interoperability with
+/// legacy Gecko code that represents encodings as name string instead of
+/// type-safe `Encoding` objects. Using this function for other purposes is
+/// most likely the wrong thing to do.
+///
+/// `name` must be non-`NULL` even if `name_len` is zero. When `name_len`
+/// is zero, it is OK for `name` to be something non-dereferencable,
+/// such as `0x1`. This is required due to Rust's optimization for slices
+/// within `Option`.
+///
+/// # Panics
+///
+/// Panics if the argument is not the name of an encoding.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `name` and `name_len` don't designate a valid memory block
+/// of if `name` is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn mozilla_encoding_for_name(name: *const u8, name_len: usize) -> *const Encoding {
+ let name_slice = ::std::slice::from_raw_parts(name, name_len);
+ encoding_for_name(name_slice)
+}
+
+/// If the argument matches exactly (case-sensitively; no whitespace
+/// removal performed) the name of an encoding, returns
+/// `&'static Encoding` representing that encoding. Otherwise panics.
+///
+/// The motivating use case for this method is interoperability with
+/// legacy Gecko code that represents encodings as name string instead of
+/// type-safe `Encoding` objects. Using this method for other purposes is
+/// most likely the wrong thing to do.
+///
+/// Available via the C wrapper.
+///
+/// # Panics
+///
+/// Panics if the argument is not the name of an encoding.
+#[cfg_attr(feature = "cargo-clippy", allow(match_wild_err_arm))]
+pub fn encoding_for_name(name: &[u8]) -> &'static Encoding {
+ // The length of `"UTF-8"` is unique, so it's easy to check the most
+ // common case first.
+ if name.len() == 5 {
+ assert_eq!(name, b"UTF-8", "Bogus encoding name");
+ return UTF_8;
+ }
+ match ENCODINGS_SORTED_BY_NAME.binary_search_by(
+ |probe| {
+ let bytes = probe.name().as_bytes();
+ let c = bytes.len().cmp(&name.len());
+ if c != Ordering::Equal {
+ return c;
+ }
+ let probe_iter = bytes.iter().rev();
+ let candidate_iter = name.iter().rev();
+ probe_iter.cmp(candidate_iter)
+ }
+ ) {
+ Ok(i) => ENCODINGS_SORTED_BY_NAME[i],
+ Err(_) => panic!("Bogus encoding name"),
+ }
+}
+
#[no_mangle]
pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring(encoding: *mut *const Encoding,
src: *const u8,
src_len: usize,
dst: *mut nsAString)
-> nsresult {
let (rv, enc) = decode_to_nsstring(&**encoding, slice::from_raw_parts(src, src_len), &mut *dst);
*encoding = enc as *const Encoding;
--- a/intl/gtest/TestEncoding.cpp
+++ b/intl/gtest/TestEncoding.cpp
@@ -22,22 +22,113 @@ ENCODING_TEST(ForLabel)
nsAutoCString label(" uTf-8 ");
ASSERT_EQ(Encoding::ForLabel(label), UTF_8_ENCODING);
label.AssignLiteral(" cseucpkdfmTjapanese ");
ASSERT_EQ(Encoding::ForLabel(label), EUC_JP_ENCODING);
}
ENCODING_TEST(ForName)
{
- nsAutoCString encoding("UTF-8");
+ nsAutoCString encoding("GBK");
+ ASSERT_EQ(Encoding::ForName(encoding), GBK_ENCODING);
+ encoding.AssignLiteral("Big5");
+ ASSERT_EQ(Encoding::ForName(encoding), BIG5_ENCODING);
+ encoding.AssignLiteral("UTF-8");
ASSERT_EQ(Encoding::ForName(encoding), UTF_8_ENCODING);
+ encoding.AssignLiteral("IBM866");
+ ASSERT_EQ(Encoding::ForName(encoding), IBM866_ENCODING);
encoding.AssignLiteral("EUC-JP");
ASSERT_EQ(Encoding::ForName(encoding), EUC_JP_ENCODING);
+ encoding.AssignLiteral("KOI8-R");
+ ASSERT_EQ(Encoding::ForName(encoding), KOI8_R_ENCODING);
+ encoding.AssignLiteral("EUC-KR");
+ ASSERT_EQ(Encoding::ForName(encoding), EUC_KR_ENCODING);
+ encoding.AssignLiteral("KOI8-U");
+ ASSERT_EQ(Encoding::ForName(encoding), KOI8_U_ENCODING);
+ encoding.AssignLiteral("gb18030");
+ ASSERT_EQ(Encoding::ForName(encoding), GB18030_ENCODING);
+ encoding.AssignLiteral("UTF-16BE");
+ ASSERT_EQ(Encoding::ForName(encoding), UTF_16BE_ENCODING);
+ encoding.AssignLiteral("UTF-16LE");
+ ASSERT_EQ(Encoding::ForName(encoding), UTF_16LE_ENCODING);
+ encoding.AssignLiteral("Shift_JIS");
+ ASSERT_EQ(Encoding::ForName(encoding), SHIFT_JIS_ENCODING);
+ encoding.AssignLiteral("macintosh");
+ ASSERT_EQ(Encoding::ForName(encoding), MACINTOSH_ENCODING);
+ encoding.AssignLiteral("ISO-8859-2");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_2_ENCODING);
+ encoding.AssignLiteral("ISO-8859-3");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_3_ENCODING);
+ encoding.AssignLiteral("ISO-8859-4");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_4_ENCODING);
+ encoding.AssignLiteral("ISO-8859-5");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_5_ENCODING);
+ encoding.AssignLiteral("ISO-8859-6");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_6_ENCODING);
+ encoding.AssignLiteral("ISO-8859-7");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_7_ENCODING);
+ encoding.AssignLiteral("ISO-8859-8");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_8_ENCODING);
+ encoding.AssignLiteral("ISO-8859-10");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_10_ENCODING);
+ encoding.AssignLiteral("ISO-8859-13");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_13_ENCODING);
+ encoding.AssignLiteral("ISO-8859-14");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_14_ENCODING);
+ encoding.AssignLiteral("windows-874");
+ ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_874_ENCODING);
+ encoding.AssignLiteral("ISO-8859-15");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_15_ENCODING);
+ encoding.AssignLiteral("ISO-8859-16");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_16_ENCODING);
+ encoding.AssignLiteral("ISO-2022-JP");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_2022_JP_ENCODING);
+ encoding.AssignLiteral("replacement");
+ ASSERT_EQ(Encoding::ForName(encoding), REPLACEMENT_ENCODING);
+ encoding.AssignLiteral("windows-1250");
+ ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1250_ENCODING);
+ encoding.AssignLiteral("windows-1251");
+ ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1251_ENCODING);
+ encoding.AssignLiteral("windows-1252");
+ ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1252_ENCODING);
+ encoding.AssignLiteral("windows-1253");
+ ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1253_ENCODING);
+ encoding.AssignLiteral("windows-1254");
+ ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1254_ENCODING);
+ encoding.AssignLiteral("windows-1255");
+ ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1255_ENCODING);
+ encoding.AssignLiteral("windows-1256");
+ ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1256_ENCODING);
+ encoding.AssignLiteral("windows-1257");
+ ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1257_ENCODING);
+ encoding.AssignLiteral("windows-1258");
+ ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1258_ENCODING);
+ encoding.AssignLiteral("ISO-8859-8-I");
+ ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_8_I_ENCODING);
+ encoding.AssignLiteral("x-mac-cyrillic");
+ ASSERT_EQ(Encoding::ForName(encoding), X_MAC_CYRILLIC_ENCODING);
+ encoding.AssignLiteral("x-user-defined");
+ ASSERT_EQ(Encoding::ForName(encoding), X_USER_DEFINED_ENCODING);
}
+// Test disabled pending bug 1393711
+#if 0
+ENCODING_TEST(BogusName)
+{
+ nsAutoCString encoding("utf-8");
+ ASSERT_DEATH_IF_SUPPORTED(Encoding::ForName(encoding), "Bogus encoding name");
+ encoding.AssignLiteral("ISO-8859-1");
+ ASSERT_DEATH_IF_SUPPORTED(Encoding::ForName(encoding), "Bogus encoding name");
+ encoding.AssignLiteral("gbk");
+ ASSERT_DEATH_IF_SUPPORTED(Encoding::ForName(encoding), "Bogus encoding name");
+ encoding.AssignLiteral(" UTF-8 ");
+ ASSERT_DEATH_IF_SUPPORTED(Encoding::ForName(encoding), "Bogus encoding name");
+}
+#endif
+
ENCODING_TEST(ForBOM)
{
nsAutoCString data("\xEF\xBB\xBF\x61");
const Encoding* encoding;
size_t bomLength;
Tie(encoding, bomLength) = Encoding::ForBOM(data);
ASSERT_EQ(encoding, UTF_8_ENCODING);
ASSERT_EQ(bomLength, 3U);