Bug 1372230 part 1 - Move the code behing mozilla::Encoding::ForName() from encoding_rs to encoding_glue. draft
authorHenri Sivonen <hsivonen@hsivonen.fi>
Fri, 25 Aug 2017 12:37:28 +0300
changeset 654097 29ad6f6fb95bc5ac6c5352d69d3a7790f07128c1
parent 654082 633389558a270c9f650ae44152b437a0093a18e9
child 654098 3eb87c3bd93c2cded07398036ba80037294f5c34
child 654099 ad2c60face1d201372c9eec6028b61f267b5ed96
child 654207 ab8702d1cff312ced30ee8edc405a9c8cc4cee2f
push id76483
push userbmo:hsivonen@hsivonen.fi
push dateMon, 28 Aug 2017 09:27:08 +0000
bugs1372230
milestone57.0a1
Bug 1372230 part 1 - Move the code behing mozilla::Encoding::ForName() from encoding_rs to encoding_glue. MozReview-Commit-ID: 6cFRuyWoPhR
intl/Encoding.h
intl/encoding_glue/src/lib.rs
intl/gtest/TestEncoding.cpp
--- a/intl/Encoding.h
+++ b/intl/Encoding.h
@@ -30,16 +30,19 @@ class Encoder;
 #define ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR mozilla::NotNull<const mozilla::Encoding*>
 #define ENCODING_RS_ENCODER mozilla::Encoder
 #define ENCODING_RS_DECODER mozilla::Decoder
 
 #include "encoding_rs.h"
 
 extern "C" {
 
+mozilla::Encoding const*
+mozilla_encoding_for_name(uint8_t const* name, size_t name_len);
+
 nsresult
 mozilla_encoding_decode_to_nsstring(mozilla::Encoding const** encoding,
                                     uint8_t const* src,
                                     size_t src_len,
                                     nsAString* dst);
 
 nsresult
 mozilla_encoding_decode_to_nsstring_with_bom_removal(
@@ -255,17 +258,17 @@ public:
    * The motivating use case for this method is interoperability with
    * legacy Gecko code that represents encodings as name string instead of
    * type-safe `Encoding` objects. Using this method for other purposes is
    * most likely the wrong thing to do.
    */
   static inline NotNull<const mozilla::Encoding*> ForName(
     Span<const char> aName)
   {
-    return WrapNotNull(encoding_for_name(
+    return WrapNotNull(mozilla_encoding_for_name(
       reinterpret_cast<const uint8_t*>(aName.Elements()), aName.Length()));
   }
 
   /**
    * Writes the name of this encoding into `aName`.
    *
    * This name is appropriate to return as-is from the DOM
    * `document.characterSet` property.
--- a/intl/encoding_glue/src/lib.rs
+++ b/intl/encoding_glue/src/lib.rs
@@ -11,16 +11,17 @@
 // "top-level directory" in the above notice refers to
 // third_party/rust/encoding_rs/.
 
 extern crate encoding_rs;
 extern crate nsstring;
 extern crate nserror;
 
 use std::slice;
+use std::cmp::Ordering;
 use encoding_rs::*;
 use nsstring::*;
 use nserror::*;
 
 // nsStringBuffer's internal bookkeeping takes 8 bytes from
 // the allocation. Plus one for termination.
 const NS_CSTRING_OVERHEAD: usize = 9;
 
@@ -47,16 +48,123 @@ macro_rules! try_dst_set_len {
     unsafe {
         if $dst.fallible_set_length(needed).is_err() {
             return $ret;
         }
     }
      )
 }
 
+static ENCODINGS_SORTED_BY_NAME: [&'static Encoding; 39] = [&GBK_INIT,
+                                                            &BIG5_INIT,
+                                                            &IBM866_INIT,
+                                                            &EUC_JP_INIT,
+                                                            &KOI8_R_INIT,
+                                                            &EUC_KR_INIT,
+                                                            &KOI8_U_INIT,
+                                                            &GB18030_INIT,
+                                                            &UTF_16BE_INIT,
+                                                            &UTF_16LE_INIT,
+                                                            &SHIFT_JIS_INIT,
+                                                            &MACINTOSH_INIT,
+                                                            &ISO_8859_2_INIT,
+                                                            &ISO_8859_3_INIT,
+                                                            &ISO_8859_4_INIT,
+                                                            &ISO_8859_5_INIT,
+                                                            &ISO_8859_6_INIT,
+                                                            &ISO_8859_7_INIT,
+                                                            &ISO_8859_8_INIT,
+                                                            &ISO_8859_10_INIT,
+                                                            &ISO_8859_13_INIT,
+                                                            &ISO_8859_14_INIT,
+                                                            &WINDOWS_874_INIT,
+                                                            &ISO_8859_15_INIT,
+                                                            &ISO_8859_16_INIT,
+                                                            &ISO_2022_JP_INIT,
+                                                            &REPLACEMENT_INIT,
+                                                            &WINDOWS_1250_INIT,
+                                                            &WINDOWS_1251_INIT,
+                                                            &WINDOWS_1252_INIT,
+                                                            &WINDOWS_1253_INIT,
+                                                            &WINDOWS_1254_INIT,
+                                                            &WINDOWS_1255_INIT,
+                                                            &WINDOWS_1256_INIT,
+                                                            &WINDOWS_1257_INIT,
+                                                            &WINDOWS_1258_INIT,
+                                                            &ISO_8859_8_I_INIT,
+                                                            &X_MAC_CYRILLIC_INIT,
+                                                            &X_USER_DEFINED_INIT];
+
+/// If the argument matches exactly (case-sensitively; no whitespace
+/// removal performed) the name of an encoding, returns
+/// `const Encoding*` representing that encoding. Otherwise panics.
+///
+/// The motivating use case for this function is interoperability with
+/// legacy Gecko code that represents encodings as name string instead of
+/// type-safe `Encoding` objects. Using this function for other purposes is
+/// most likely the wrong thing to do.
+///
+/// `name` must be non-`NULL` even if `name_len` is zero. When `name_len`
+/// is zero, it is OK for `name` to be something non-dereferencable,
+/// such as `0x1`. This is required due to Rust's optimization for slices
+/// within `Option`.
+///
+/// # Panics
+///
+/// Panics if the argument is not the name of an encoding.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `name` and `name_len` don't designate a valid memory block
+/// of if `name` is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn mozilla_encoding_for_name(name: *const u8, name_len: usize) -> *const Encoding {
+    let name_slice = ::std::slice::from_raw_parts(name, name_len);
+    encoding_for_name(name_slice)
+}
+
+/// If the argument matches exactly (case-sensitively; no whitespace
+/// removal performed) the name of an encoding, returns
+/// `&'static Encoding` representing that encoding. Otherwise panics.
+///
+/// The motivating use case for this method is interoperability with
+/// legacy Gecko code that represents encodings as name string instead of
+/// type-safe `Encoding` objects. Using this method for other purposes is
+/// most likely the wrong thing to do.
+///
+/// Available via the C wrapper.
+///
+/// # Panics
+///
+/// Panics if the argument is not the name of an encoding.
+#[cfg_attr(feature = "cargo-clippy", allow(match_wild_err_arm))]
+pub fn encoding_for_name(name: &[u8]) -> &'static Encoding {
+    // The length of `"UTF-8"` is unique, so it's easy to check the most
+    // common case first.
+    if name.len() == 5 {
+        assert_eq!(name, b"UTF-8", "Bogus encoding name");
+        return UTF_8;
+    }
+    match ENCODINGS_SORTED_BY_NAME.binary_search_by(
+        |probe| {
+            let bytes = probe.name().as_bytes();
+            let c = bytes.len().cmp(&name.len());
+            if c != Ordering::Equal {
+                return c;
+            }
+            let probe_iter = bytes.iter().rev();
+            let candidate_iter = name.iter().rev();
+            probe_iter.cmp(candidate_iter)
+        }
+    ) {
+        Ok(i) => ENCODINGS_SORTED_BY_NAME[i],
+        Err(_) => panic!("Bogus encoding name"),
+    }
+}
+
 #[no_mangle]
 pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring(encoding: *mut *const Encoding,
                                                              src: *const u8,
                                                              src_len: usize,
                                                              dst: *mut nsAString)
                                                              -> nsresult {
     let (rv, enc) = decode_to_nsstring(&**encoding, slice::from_raw_parts(src, src_len), &mut *dst);
     *encoding = enc as *const Encoding;
--- a/intl/gtest/TestEncoding.cpp
+++ b/intl/gtest/TestEncoding.cpp
@@ -22,22 +22,113 @@ ENCODING_TEST(ForLabel)
   nsAutoCString label("  uTf-8   ");
   ASSERT_EQ(Encoding::ForLabel(label), UTF_8_ENCODING);
   label.AssignLiteral("   cseucpkdfmTjapanese  ");
   ASSERT_EQ(Encoding::ForLabel(label), EUC_JP_ENCODING);
 }
 
 ENCODING_TEST(ForName)
 {
-  nsAutoCString encoding("UTF-8");
+  nsAutoCString encoding("GBK");
+  ASSERT_EQ(Encoding::ForName(encoding), GBK_ENCODING);
+  encoding.AssignLiteral("Big5");
+  ASSERT_EQ(Encoding::ForName(encoding), BIG5_ENCODING);
+  encoding.AssignLiteral("UTF-8");
   ASSERT_EQ(Encoding::ForName(encoding), UTF_8_ENCODING);
+  encoding.AssignLiteral("IBM866");
+  ASSERT_EQ(Encoding::ForName(encoding), IBM866_ENCODING);
   encoding.AssignLiteral("EUC-JP");
   ASSERT_EQ(Encoding::ForName(encoding), EUC_JP_ENCODING);
+  encoding.AssignLiteral("KOI8-R");
+  ASSERT_EQ(Encoding::ForName(encoding), KOI8_R_ENCODING);
+  encoding.AssignLiteral("EUC-KR");
+  ASSERT_EQ(Encoding::ForName(encoding), EUC_KR_ENCODING);
+  encoding.AssignLiteral("KOI8-U");
+  ASSERT_EQ(Encoding::ForName(encoding), KOI8_U_ENCODING);
+  encoding.AssignLiteral("gb18030");
+  ASSERT_EQ(Encoding::ForName(encoding), GB18030_ENCODING);
+  encoding.AssignLiteral("UTF-16BE");
+  ASSERT_EQ(Encoding::ForName(encoding), UTF_16BE_ENCODING);
+  encoding.AssignLiteral("UTF-16LE");
+  ASSERT_EQ(Encoding::ForName(encoding), UTF_16LE_ENCODING);
+  encoding.AssignLiteral("Shift_JIS");
+  ASSERT_EQ(Encoding::ForName(encoding), SHIFT_JIS_ENCODING);
+  encoding.AssignLiteral("macintosh");
+  ASSERT_EQ(Encoding::ForName(encoding), MACINTOSH_ENCODING);
+  encoding.AssignLiteral("ISO-8859-2");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_2_ENCODING);
+  encoding.AssignLiteral("ISO-8859-3");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_3_ENCODING);
+  encoding.AssignLiteral("ISO-8859-4");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_4_ENCODING);
+  encoding.AssignLiteral("ISO-8859-5");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_5_ENCODING);
+  encoding.AssignLiteral("ISO-8859-6");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_6_ENCODING);
+  encoding.AssignLiteral("ISO-8859-7");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_7_ENCODING);
+  encoding.AssignLiteral("ISO-8859-8");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_8_ENCODING);
+  encoding.AssignLiteral("ISO-8859-10");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_10_ENCODING);
+  encoding.AssignLiteral("ISO-8859-13");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_13_ENCODING);
+  encoding.AssignLiteral("ISO-8859-14");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_14_ENCODING);
+  encoding.AssignLiteral("windows-874");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_874_ENCODING);
+  encoding.AssignLiteral("ISO-8859-15");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_15_ENCODING);
+  encoding.AssignLiteral("ISO-8859-16");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_16_ENCODING);
+  encoding.AssignLiteral("ISO-2022-JP");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_2022_JP_ENCODING);
+  encoding.AssignLiteral("replacement");
+  ASSERT_EQ(Encoding::ForName(encoding), REPLACEMENT_ENCODING);
+  encoding.AssignLiteral("windows-1250");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1250_ENCODING);
+  encoding.AssignLiteral("windows-1251");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1251_ENCODING);
+  encoding.AssignLiteral("windows-1252");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1252_ENCODING);
+  encoding.AssignLiteral("windows-1253");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1253_ENCODING);
+  encoding.AssignLiteral("windows-1254");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1254_ENCODING);
+  encoding.AssignLiteral("windows-1255");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1255_ENCODING);
+  encoding.AssignLiteral("windows-1256");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1256_ENCODING);
+  encoding.AssignLiteral("windows-1257");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1257_ENCODING);
+  encoding.AssignLiteral("windows-1258");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1258_ENCODING);
+  encoding.AssignLiteral("ISO-8859-8-I");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_8_I_ENCODING);
+  encoding.AssignLiteral("x-mac-cyrillic");
+  ASSERT_EQ(Encoding::ForName(encoding), X_MAC_CYRILLIC_ENCODING);
+  encoding.AssignLiteral("x-user-defined");
+  ASSERT_EQ(Encoding::ForName(encoding), X_USER_DEFINED_ENCODING);
 }
 
+// Test disabled pending bug 1393711
+#if 0
+ENCODING_TEST(BogusName)
+{
+  nsAutoCString encoding("utf-8");
+  ASSERT_DEATH_IF_SUPPORTED(Encoding::ForName(encoding), "Bogus encoding name");
+  encoding.AssignLiteral("ISO-8859-1");
+  ASSERT_DEATH_IF_SUPPORTED(Encoding::ForName(encoding), "Bogus encoding name");
+  encoding.AssignLiteral("gbk");
+  ASSERT_DEATH_IF_SUPPORTED(Encoding::ForName(encoding), "Bogus encoding name");
+  encoding.AssignLiteral(" UTF-8 ");
+  ASSERT_DEATH_IF_SUPPORTED(Encoding::ForName(encoding), "Bogus encoding name");
+}
+#endif
+
 ENCODING_TEST(ForBOM)
 {
   nsAutoCString data("\xEF\xBB\xBF\x61");
   const Encoding* encoding;
   size_t bomLength;
   Tie(encoding, bomLength) = Encoding::ForBOM(data);
   ASSERT_EQ(encoding, UTF_8_ENCODING);
   ASSERT_EQ(bomLength, 3U);