author Henri Sivonen <hsivonen@hsivonen.fi>

Tue, 05 Dec 2017 13:33:52 +0200

changeset 707588 c8db529c3c4db825800be65e9609f3ee8344e36a

parent 707519 e56181d42ce2da739b668c33cd92622b5c4fa6f8

child 709006 84346795b9374a3b113e4168f37ea1bf7a435c58

child 709513 abdb267c33222b55729bd52a022f226c60af6201

push id 92169

push user bmo:hsivonen@hsivonen.fi

push date Tue, 05 Dec 2017 15:27:50 +0000

bugs 960957

milestone 59.0a1

xpcom/build/XPCOMInit.cpp file | annotate | diff | comparison | revisions

xpcom/io/nsNativeCharsetUtils.cpp file | annotate | diff | comparison | revisions

xpcom/io/nsNativeCharsetUtils.h file | annotate | diff | comparison | revisions
--- a/xpcom/build/XPCOMInit.cpp
+++ b/xpcom/build/XPCOMInit.cpp
@@ -539,20 +539,16 @@ NS_InitXPCOM2(nsIServiceManager** aResul
 #ifndef ANDROID
   // If the locale hasn't already been setup by our embedder,
   // get us out of the "C" locale and into the system
   if (strcmp(setlocale(LC_ALL, nullptr), "C") == 0) {
     setlocale(LC_ALL, "");
   }
 #endif
 
-#if defined(XP_UNIX)
-  NS_StartupNativeCharsetUtils();
-#endif
-
   NS_StartupLocalFile();
 
   nsDirectoryService::RealInit();
 
   bool value;
 
   if (aBinDirectory) {
     rv = aBinDirectory->IsDirectory(&value);
@@ -978,19 +974,16 @@ ShutdownXPCOM(nsIServiceManager* aServMg
 #ifdef XP_MACOSX
     mozilla::OnlyReportDirtyWrites();
 #endif /* XP_MACOSX */
     mozilla::BeginLateWriteChecks();
   }
 
   // Shutdown nsLocalFile string conversion
   NS_ShutdownLocalFile();
-#ifdef XP_UNIX
-  NS_ShutdownNativeCharsetUtils();
-#endif
 
   // Shutdown xpcom. This will release all loaders and cause others holding
   // a refcount to the component manager to release it.
   if (nsComponentManagerImpl::gComponentManager) {
     rv = (nsComponentManagerImpl::gComponentManager)->Shutdown();
     NS_ASSERTION(NS_SUCCEEDED(rv), "Component Manager shutdown failed.");
   } else {
     NS_WARNING("Component Manager was never created ...");
--- a/xpcom/io/nsNativeCharsetUtils.cpp
+++ b/xpcom/io/nsNativeCharsetUtils.cpp
@@ -2,19 +2,19 @@
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "xpcom-private.h"
 
 //-----------------------------------------------------------------------------
-// XP_MACOSX or ANDROID
+// Non-Windows
 //-----------------------------------------------------------------------------
-#if defined(XP_MACOSX) || defined(ANDROID)
+#ifndef XP_WIN
 
 #include "nsAString.h"
 #include "nsReadableUtils.h"
 #include "nsString.h"
 
 nsresult
 NS_CopyNativeToUnicode(const nsACString& aInput, nsAString& aOutput)
 {
@@ -24,886 +24,20 @@ NS_CopyNativeToUnicode(const nsACString&
 
 nsresult
 NS_CopyUnicodeToNative(const nsAString&  aInput, nsACString& aOutput)
 {
   CopyUTF16toUTF8(aInput, aOutput);
   return NS_OK;
 }
 
-void
-NS_StartupNativeCharsetUtils()
-{
-}
-
-void
-NS_ShutdownNativeCharsetUtils()
-{
-}
-
-
-//-----------------------------------------------------------------------------
-// XP_UNIX
-//-----------------------------------------------------------------------------
-#elif defined(XP_UNIX)
-
-#include <stdlib.h>   // mbtowc, wctomb
-#include <locale.h>   // setlocale
-#include "mozilla/Mutex.h"
-#include "nscore.h"
-#include "nsAString.h"
-#include "nsReadableUtils.h"
-
-using namespace mozilla;
-
-//
-// choose a conversion library.  we used to use mbrtowc/wcrtomb under Linux,
-// but that doesn't work for non-BMP characters whether we use '-fshort-wchar'
-// or not (see bug 206811 and
-// news://news.mozilla.org:119/bajml3$fvr1@ripley.netscape.com). we now use
-// iconv for all platforms where nltypes.h and nllanginfo.h are present
-// along with iconv.
-//
-#if defined(HAVE_ICONV) && defined(HAVE_NL_TYPES_H) && defined(HAVE_LANGINFO_CODESET)
-#define USE_ICONV 1
-#else
-#define USE_STDCONV 1
-#endif
-
-static void
-isolatin1_to_utf16(const char** aInput, uint32_t* aInputLeft,
-                   char16_t** aOutput, uint32_t* aOutputLeft)
-{
-  while (*aInputLeft && *aOutputLeft) {
-    **aOutput = (unsigned char)** aInput;
-    (*aInput)++;
-    (*aInputLeft)--;
-    (*aOutput)++;
-    (*aOutputLeft)--;
-  }
-}
-
-static void
-utf16_to_isolatin1(const char16_t** aInput, uint32_t* aInputLeft,
-                   char** aOutput, uint32_t* aOutputLeft)
-{
-  while (*aInputLeft && *aOutputLeft) {
-    **aOutput = (unsigned char)**aInput;
-    (*aInput)++;
-    (*aInputLeft)--;
-    (*aOutput)++;
-    (*aOutputLeft)--;
-  }
-}
-
-//-----------------------------------------------------------------------------
-// conversion using iconv
-//-----------------------------------------------------------------------------
-#if defined(USE_ICONV)
-#include <nl_types.h> // CODESET
-#include <langinfo.h> // nl_langinfo
-#include <iconv.h>    // iconv_open, iconv, iconv_close
-#include <errno.h>
-#include "plstr.h"
-
-#if defined(HAVE_ICONV_WITH_CONST_INPUT)
-#define ICONV_INPUT(x) (x)
-#else
-#define ICONV_INPUT(x) ((char **)x)
-#endif
-
-// solaris definitely needs this, but we'll enable it by default
-// just in case... but we know for sure that iconv(3) in glibc
-// doesn't need this.
-#if !defined(__GLIBC__)
-#define ENABLE_UTF8_FALLBACK_SUPPORT
-#endif
-
-#define INVALID_ICONV_T ((iconv_t)-1)
-
-static inline size_t
-xp_iconv(iconv_t converter,
-         const char** aInput, size_t* aInputLeft,
-         char** aOutput, size_t* aOutputLeft)
-{
-  size_t res, outputAvail = *aOutputLeft;
-  res = iconv(converter, ICONV_INPUT(aInput), aInputLeft, aOutput, aOutputLeft);
-  if (res == (size_t)-1) {
-    // on some platforms (e.g., linux) iconv will fail with
-    // E2BIG if it cannot convert _all_ of its input.  it'll
-    // still adjust all of the in/out params correctly, so we
-    // can ignore this error.  the assumption is that we will
-    // be called again to complete the conversion.
-    if ((errno == E2BIG) && (*aOutputLeft < outputAvail)) {
-      res = 0;
-    }
-  }
-  return res;
-}
-
-static inline void
-xp_iconv_reset(iconv_t converter)
-{
-  // NOTE: the man pages on Solaris claim that you can pass nullptr
-  // for all parameter to reset the converter, but beware the
-  // evil Solaris crash if you go down this route >:-)
-
-  const char* zero_char_in_ptr  = nullptr;
-  char* zero_char_out_ptr = nullptr;
-  size_t zero_size_in = 0;
-  size_t zero_size_out = 0;
-
-  xp_iconv(converter,
-           &zero_char_in_ptr,
-           &zero_size_in,
-           &zero_char_out_ptr,
-           &zero_size_out);
-}
-
-static inline iconv_t
-xp_iconv_open(const char** to_list, const char** from_list)
-{
-  iconv_t res;
-  const char** from_name;
-  const char** to_name;
-
-  // try all possible combinations to locate a converter.
-  to_name = to_list;
-  while (*to_name) {
-    if (**to_name) {
-      from_name = from_list;
-      while (*from_name) {
-        if (**from_name) {
-          res = iconv_open(*to_name, *from_name);
-          if (res != INVALID_ICONV_T) {
-            return res;
-          }
-        }
-        from_name++;
-      }
-    }
-    to_name++;
-  }
-
-  return INVALID_ICONV_T;
-}
-
-/*
- * char16_t[] is NOT a UCS-2 array BUT a UTF-16 string. Therefore, we
- * have to use UTF-16 with iconv(3) on platforms where it's supported.
- * However, the way UTF-16 and UCS-2 are interpreted varies across platforms
- * and implementations of iconv(3). On Tru64, it also depends on the environment
- * variable. To avoid the trouble arising from byte-swapping
- * (bug 208809), we have to try UTF-16LE/BE and UCS-2LE/BE before falling
- * back to UTF-16 and UCS-2 and variants. We assume that UTF-16 and UCS-2
- * on systems without UTF-16LE/BE and UCS-2LE/BE have the native endianness,
- * which isn't the case of glibc 2.1.x, for which we use 'UNICODELITTLE'
- * and 'UNICODEBIG'. It's also not true of Tru64 V4 when the environment
- * variable ICONV_BYTEORDER is set to 'big-endian', about which not much
- * can be done other than adding a note in the release notes. (bug 206811)
- */
-static const char* UTF_16_NAMES[] = {
-#if defined(IS_LITTLE_ENDIAN)
-  "UTF-16LE",
-#if defined(__GLIBC__)
-  "UNICODELITTLE",
-#endif
-  "UCS-2LE",
-#else
-  "UTF-16BE",
-#if defined(__GLIBC__)
-  "UNICODEBIG",
-#endif
-  "UCS-2BE",
-#endif
-  "UTF-16",
-  "UCS-2",
-  "UCS2",
-  "UCS_2",
-  "ucs-2",
-  "ucs2",
-  "ucs_2",
-  nullptr
-};
-
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
-static const char* UTF_8_NAMES[] = {
-  "UTF-8",
-  "UTF8",
-  "UTF_8",
-  "utf-8",
-  "utf8",
-  "utf_8",
-  nullptr
-};
-#endif
-
-static const char* ISO_8859_1_NAMES[] = {
-  "ISO-8859-1",
-#if !defined(__GLIBC__)
-  "ISO8859-1",
-  "ISO88591",
-  "ISO_8859_1",
-  "ISO8859_1",
-  "iso-8859-1",
-  "iso8859-1",
-  "iso88591",
-  "iso_8859_1",
-  "iso8859_1",
-#endif
-  nullptr
-};
-
-class nsNativeCharsetConverter
-{
-public:
-  nsNativeCharsetConverter();
-  ~nsNativeCharsetConverter();
-
-  nsresult NativeToUnicode(const char** aInput, uint32_t* aInputLeft,
-                           char16_t** aOutput, uint32_t* aOutputLeft);
-  nsresult UnicodeToNative(const char16_t** aInput, uint32_t* aInputLeft,
-                           char** aOutput, uint32_t* aOutputLeft);
-
-  static void GlobalInit();
-  static void GlobalShutdown();
-  static bool IsNativeUTF8();
-
-private:
-  static iconv_t gNativeToUnicode;
-  static iconv_t gUnicodeToNative;
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
-  static iconv_t gNativeToUTF8;
-  static iconv_t gUTF8ToNative;
-  static iconv_t gUnicodeToUTF8;
-  static iconv_t gUTF8ToUnicode;
-#endif
-  static Mutex*  gLock;
-  static bool    gInitialized;
-  static bool    gIsNativeUTF8;
-
-  static void LazyInit();
-
-  static void Lock()
-  {
-    if (gLock) {
-      gLock->Lock();
-    }
-  }
-  static void Unlock()
-  {
-    if (gLock) {
-      gLock->Unlock();
-    }
-  }
-};
-
-iconv_t nsNativeCharsetConverter::gNativeToUnicode = INVALID_ICONV_T;
-iconv_t nsNativeCharsetConverter::gUnicodeToNative = INVALID_ICONV_T;
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
-iconv_t nsNativeCharsetConverter::gNativeToUTF8    = INVALID_ICONV_T;
-iconv_t nsNativeCharsetConverter::gUTF8ToNative    = INVALID_ICONV_T;
-iconv_t nsNativeCharsetConverter::gUnicodeToUTF8   = INVALID_ICONV_T;
-iconv_t nsNativeCharsetConverter::gUTF8ToUnicode   = INVALID_ICONV_T;
-#endif
-Mutex*  nsNativeCharsetConverter::gLock            = nullptr;
-bool    nsNativeCharsetConverter::gInitialized     = false;
-bool    nsNativeCharsetConverter::gIsNativeUTF8    = false;
-
-void
-nsNativeCharsetConverter::LazyInit()
-{
-  // LazyInit may be called before NS_StartupNativeCharsetUtils, but
-  // the setlocale it does has to be called before nl_langinfo. Like in
-  // NS_StartupNativeCharsetUtils, assume we are called early enough that
-  // we are the first to care about the locale's charset.
-  if (!gLock) {
-    setlocale(LC_CTYPE, "");
-  }
-  const char* blank_list[] = { "", nullptr };
-  const char** native_charset_list = blank_list;
-  const char* native_charset = nl_langinfo(CODESET);
-  if (!native_charset) {
-    NS_ERROR("native charset is unknown");
-    // fallback to ISO-8859-1
-    native_charset_list = ISO_8859_1_NAMES;
-  } else {
-    native_charset_list[0] = native_charset;
-  }
-
-  // Most, if not all, Unixen supporting UTF-8 and nl_langinfo(CODESET)
-  // return 'UTF-8' (or 'utf-8')
-  if (!PL_strcasecmp(native_charset, "UTF-8")) {
-    gIsNativeUTF8 = true;
-  }
-
-  gNativeToUnicode = xp_iconv_open(UTF_16_NAMES, native_charset_list);
-  gUnicodeToNative = xp_iconv_open(native_charset_list, UTF_16_NAMES);
-
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
-  if (gNativeToUnicode == INVALID_ICONV_T) {
-    gNativeToUTF8 = xp_iconv_open(UTF_8_NAMES, native_charset_list);
-    gUTF8ToUnicode = xp_iconv_open(UTF_16_NAMES, UTF_8_NAMES);
-    NS_ASSERTION(gNativeToUTF8 != INVALID_ICONV_T, "no native to utf-8 converter");
-    NS_ASSERTION(gUTF8ToUnicode != INVALID_ICONV_T, "no utf-8 to utf-16 converter");
-  }
-  if (gUnicodeToNative == INVALID_ICONV_T) {
-    gUnicodeToUTF8 = xp_iconv_open(UTF_8_NAMES, UTF_16_NAMES);
-    gUTF8ToNative = xp_iconv_open(native_charset_list, UTF_8_NAMES);
-    NS_ASSERTION(gUnicodeToUTF8 != INVALID_ICONV_T, "no utf-16 to utf-8 converter");
-    NS_ASSERTION(gUTF8ToNative != INVALID_ICONV_T, "no utf-8 to native converter");
-  }
-#else
-  NS_ASSERTION(gNativeToUnicode != INVALID_ICONV_T, "no native to utf-16 converter");
-  NS_ASSERTION(gUnicodeToNative != INVALID_ICONV_T, "no utf-16 to native converter");
-#endif
-
-  /*
-   * On Solaris 8 (and newer?), the iconv modules converting to UCS-2
-   * prepend a byte order mark unicode character (BOM, u+FEFF) during
-   * the first use of the iconv converter. The same is the case of
-   * glibc 2.2.9x and Tru64 V5 (see bug 208809) when 'UTF-16' is used.
-   * However, we use 'UTF-16LE/BE' in both cases, instead so that we
-   * should be safe. But just in case...
-   *
-   * This dummy conversion gets rid of the BOMs and fixes bug 153562.
-   */
-  char dummy_input[1] = { ' ' };
-  char dummy_output[4];
-
-  if (gNativeToUnicode != INVALID_ICONV_T) {
-    const char* input = dummy_input;
-    size_t input_left = sizeof(dummy_input);
-    char* output = dummy_output;
-    size_t output_left = sizeof(dummy_output);
-
-    xp_iconv(gNativeToUnicode, &input, &input_left, &output, &output_left);
-  }
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
-  if (gUTF8ToUnicode != INVALID_ICONV_T) {
-    const char* input = dummy_input;
-    size_t input_left = sizeof(dummy_input);
-    char* output = dummy_output;
-    size_t output_left = sizeof(dummy_output);
-
-    xp_iconv(gUTF8ToUnicode, &input, &input_left, &output, &output_left);
-  }
-#endif
-
-  gInitialized = true;
-}
-
-void
-nsNativeCharsetConverter::GlobalInit()
-{
-  gLock = new Mutex("nsNativeCharsetConverter.gLock");
-}
-
-void
-nsNativeCharsetConverter::GlobalShutdown()
-{
-  delete gLock;
-  gLock = nullptr;
-
-  if (gNativeToUnicode != INVALID_ICONV_T) {
-    iconv_close(gNativeToUnicode);
-    gNativeToUnicode = INVALID_ICONV_T;
-  }
-
-  if (gUnicodeToNative != INVALID_ICONV_T) {
-    iconv_close(gUnicodeToNative);
-    gUnicodeToNative = INVALID_ICONV_T;
-  }
-
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
-  if (gNativeToUTF8 != INVALID_ICONV_T) {
-    iconv_close(gNativeToUTF8);
-    gNativeToUTF8 = INVALID_ICONV_T;
-  }
-  if (gUTF8ToNative != INVALID_ICONV_T) {
-    iconv_close(gUTF8ToNative);
-    gUTF8ToNative = INVALID_ICONV_T;
-  }
-  if (gUnicodeToUTF8 != INVALID_ICONV_T) {
-    iconv_close(gUnicodeToUTF8);
-    gUnicodeToUTF8 = INVALID_ICONV_T;
-  }
-  if (gUTF8ToUnicode != INVALID_ICONV_T) {
-    iconv_close(gUTF8ToUnicode);
-    gUTF8ToUnicode = INVALID_ICONV_T;
-  }
-#endif
-
-  gInitialized = false;
-}
-
-nsNativeCharsetConverter::nsNativeCharsetConverter()
-{
-  Lock();
-  if (!gInitialized) {
-    LazyInit();
-  }
-}
-
-nsNativeCharsetConverter::~nsNativeCharsetConverter()
-{
-  // reset converters for next time
-  if (gNativeToUnicode != INVALID_ICONV_T) {
-    xp_iconv_reset(gNativeToUnicode);
-  }
-  if (gUnicodeToNative != INVALID_ICONV_T) {
-    xp_iconv_reset(gUnicodeToNative);
-  }
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
-  if (gNativeToUTF8 != INVALID_ICONV_T) {
-    xp_iconv_reset(gNativeToUTF8);
-  }
-  if (gUTF8ToNative != INVALID_ICONV_T) {
-    xp_iconv_reset(gUTF8ToNative);
-  }
-  if (gUnicodeToUTF8 != INVALID_ICONV_T) {
-    xp_iconv_reset(gUnicodeToUTF8);
-  }
-  if (gUTF8ToUnicode != INVALID_ICONV_T) {
-    xp_iconv_reset(gUTF8ToUnicode);
-  }
-#endif
-  Unlock();
-}
-
-nsresult
-nsNativeCharsetConverter::NativeToUnicode(const char** aInput,
-                                          uint32_t* aInputLeft,
-                                          char16_t** aOutput,
-                                          uint32_t* aOutputLeft)
-{
-  size_t res = 0;
-  size_t inLeft = (size_t)*aInputLeft;
-  size_t outLeft = (size_t)*aOutputLeft * 2;
-
-  if (gNativeToUnicode != INVALID_ICONV_T) {
-
-    res = xp_iconv(gNativeToUnicode, aInput, &inLeft, (char**)aOutput, &outLeft);
-
-    *aInputLeft = inLeft;
-    *aOutputLeft = outLeft / 2;
-    if (res != (size_t)-1) {
-      return NS_OK;
-    }
-
-    NS_WARNING("conversion from native to utf-16 failed");
-
-    // reset converter
-    xp_iconv_reset(gNativeToUnicode);
-  }
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
-  else if ((gNativeToUTF8 != INVALID_ICONV_T) &&
-           (gUTF8ToUnicode != INVALID_ICONV_T)) {
-    // convert first to UTF8, then from UTF8 to UCS2
-    const char* in = *aInput;
-
-    char ubuf[1024];
-
-    // we assume we're always called with enough space in |aOutput|,
-    // so convert many chars at a time...
-    while (inLeft) {
-      char* p = ubuf;
-      size_t n = sizeof(ubuf);
-      res = xp_iconv(gNativeToUTF8, &in, &inLeft, &p, &n);
-      if (res == (size_t)-1) {
-        NS_ERROR("conversion from native to utf-8 failed");
-        break;
-      }
-      NS_ASSERTION(outLeft > 0, "bad assumption");
-      p = ubuf;
-      n = sizeof(ubuf) - n;
-      res = xp_iconv(gUTF8ToUnicode, (const char**)&p, &n,
-                     (char**)aOutput, &outLeft);
-      if (res == (size_t)-1) {
-        NS_ERROR("conversion from utf-8 to utf-16 failed");
-        break;
-      }
-    }
-
-    (*aInput) += (*aInputLeft - inLeft);
-    *aInputLeft = inLeft;
-    *aOutputLeft = outLeft / 2;
-
-    if (res != (size_t)-1) {
-      return NS_OK;
-    }
-
-    // reset converters
-    xp_iconv_reset(gNativeToUTF8);
-    xp_iconv_reset(gUTF8ToUnicode);
-  }
-#endif
-
-  // fallback: zero-pad and hope for the best
-  // XXX This is lame and we have to do better.
-  isolatin1_to_utf16(aInput, aInputLeft, aOutput, aOutputLeft);
-
-  return NS_OK;
-}
-
-nsresult
-nsNativeCharsetConverter::UnicodeToNative(const char16_t** aInput,
-                                          uint32_t* aInputLeft,
-                                          char** aOutput,
-                                          uint32_t* aOutputLeft)
-{
-  size_t res = 0;
-  size_t inLeft = (size_t)*aInputLeft * 2;
-  size_t outLeft = (size_t)*aOutputLeft;
-
-  if (gUnicodeToNative != INVALID_ICONV_T) {
-    res = xp_iconv(gUnicodeToNative, (const char**)aInput, &inLeft,
-                   aOutput, &outLeft);
-
-    *aInputLeft = inLeft / 2;
-    *aOutputLeft = outLeft;
-    if (res != (size_t)-1) {
-      return NS_OK;
-    }
-
-    NS_ERROR("iconv failed");
-
-    // reset converter
-    xp_iconv_reset(gUnicodeToNative);
-  }
-#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
-  else if ((gUnicodeToUTF8 != INVALID_ICONV_T) &&
-           (gUTF8ToNative != INVALID_ICONV_T)) {
-    const char* in = (const char*)*aInput;
-
-    char ubuf[6]; // max utf-8 char length (really only needs to be 4 bytes)
-
-    // convert one uchar at a time...
-    while (inLeft && outLeft) {
-      char* p = ubuf;
-      size_t n = sizeof(ubuf), one_uchar = sizeof(char16_t);
-      res = xp_iconv(gUnicodeToUTF8, &in, &one_uchar, &p, &n);
-      if (res == (size_t)-1) {
-        NS_ERROR("conversion from utf-16 to utf-8 failed");
-        break;
-      }
-      p = ubuf;
-      n = sizeof(ubuf) - n;
-      res = xp_iconv(gUTF8ToNative, (const char**)&p, &n, aOutput, &outLeft);
-      if (res == (size_t)-1) {
-        if (errno == E2BIG) {
-          // not enough room for last uchar... back up and return.
-          in -= sizeof(char16_t);
-          res = 0;
-        } else {
-          NS_ERROR("conversion from utf-8 to native failed");
-        }
-        break;
-      }
-      inLeft -= sizeof(char16_t);
-    }
-
-    (*aInput) += (*aInputLeft - inLeft / 2);
-    *aInputLeft = inLeft / 2;
-    *aOutputLeft = outLeft;
-    if (res != (size_t)-1) {
-      return NS_OK;
-    }
-
-    // reset converters
-    xp_iconv_reset(gUnicodeToUTF8);
-    xp_iconv_reset(gUTF8ToNative);
-  }
-#endif
-
-  // fallback: truncate and hope for the best
-  // XXX This is lame and we have to do better.
-  utf16_to_isolatin1(aInput, aInputLeft, aOutput, aOutputLeft);
-
-  return NS_OK;
-}
-
-bool
-nsNativeCharsetConverter::IsNativeUTF8()
-{
-  if (!gInitialized) {
-    Lock();
-    if (!gInitialized) {
-      LazyInit();
-    }
-    Unlock();
-  }
-  return gIsNativeUTF8;
-}
-
-#endif // USE_ICONV
-
-//-----------------------------------------------------------------------------
-// conversion using mb[r]towc/wc[r]tomb
-//-----------------------------------------------------------------------------
-#if defined(USE_STDCONV)
-#if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
-#include <wchar.h>    // mbrtowc, wcrtomb
-#endif
-
-class nsNativeCharsetConverter
-{
-public:
-  nsNativeCharsetConverter();
-
-  nsresult NativeToUnicode(const char** aInput, uint32_t* aInputLeft,
-                           char16_t** aOutput, uint32_t* aOutputLeft);
-  nsresult UnicodeToNative(const char16_t** aInput, uint32_t* aInputLeft,
-                           char** aOutput, uint32_t* aOutputLeft);
-
-  static void GlobalInit();
-  static void GlobalShutdown() { }
-  static bool IsNativeUTF8();
-
-private:
-  static bool gWCharIsUnicode;
-
-#if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
-  mbstate_t ps;
-#endif
-};
-
-bool nsNativeCharsetConverter::gWCharIsUnicode = false;
-
-nsNativeCharsetConverter::nsNativeCharsetConverter()
-{
-#if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
-  memset(&ps, 0, sizeof(ps));
-#endif
-}
-
-void
-nsNativeCharsetConverter::GlobalInit()
-{
-  // verify that wchar_t for the current locale is actually unicode.
-  // if it is not, then we should avoid calling mbtowc/wctomb and
-  // just fallback on zero-pad/truncation conversion.
-  //
-  // this test cannot be done at build time because the encoding of
-  // wchar_t may depend on the runtime locale.  sad, but true!!
-  //
-  // so, if wchar_t is unicode then converting an ASCII character
-  // to wchar_t should not change its numeric value.  we'll just
-  // check what happens with the ASCII 'a' character.
-  //
-  // this test is not perfect... obviously, it could yield false
-  // positives, but then at least ASCII text would be converted
-  // properly (or maybe just the 'a' character) -- oh well :(
-
-  char a = 'a';
-  unsigned int w = 0;
-
-  int res = mbtowc((wchar_t*)&w, &a, 1);
-
-  gWCharIsUnicode = (res != -1 && w == 'a');
-
-#ifdef DEBUG
-  if (!gWCharIsUnicode) {
-    NS_WARNING("wchar_t is not unicode (unicode conversion will be lossy)");
-  }
-#endif
-}
-
-nsresult
-nsNativeCharsetConverter::NativeToUnicode(const char** aInput,
-                                          uint32_t* aInputLeft,
-                                          char16_t** aOutput,
-                                          uint32_t* aOutputLeft)
-{
-  if (gWCharIsUnicode) {
-    int incr;
-
-    // cannot use wchar_t here since it may have been redefined (e.g.,
-    // via -fshort-wchar).  hopefully, sizeof(tmp) is sufficient XP.
-    unsigned int tmp = 0;
-    while (*aInputLeft && *aOutputLeft) {
-#ifdef HAVE_MBRTOWC
-      incr = (int)mbrtowc((wchar_t*)&tmp, *aInput, *aInputLeft, &ps);
-#else
-      // XXX is this thread-safe?
-      incr = (int)mbtowc((wchar_t*)&tmp, *aInput, *aInputLeft);
-#endif
-      if (incr < 0) {
-        NS_WARNING("mbtowc failed: possible charset mismatch");
-        // zero-pad and hope for the best
-        tmp = (unsigned char)**aInput;
-        incr = 1;
-      }
-      ** aOutput = (char16_t)tmp;
-      (*aInput) += incr;
-      (*aInputLeft) -= incr;
-      (*aOutput)++;
-      (*aOutputLeft)--;
-    }
-  } else {
-    // wchar_t isn't unicode, so the best we can do is treat the
-    // input as if it is isolatin1 :(
-    isolatin1_to_utf16(aInput, aInputLeft, aOutput, aOutputLeft);
-  }
-
-  return NS_OK;
-}
-
-nsresult
-nsNativeCharsetConverter::UnicodeToNative(const char16_t** aInput,
-                                          uint32_t* aInputLeft,
-                                          char** aOutput,
-                                          uint32_t* aOutputLeft)
-{
-  if (gWCharIsUnicode) {
-    int incr;
-
-    while (*aInputLeft && *aOutputLeft >= MB_CUR_MAX) {
-#ifdef HAVE_WCRTOMB
-      incr = (int)wcrtomb(*aOutput, (wchar_t)**aInput, &ps);
-#else
-      // XXX is this thread-safe?
-      incr = (int)wctomb(*aOutput, (wchar_t)**aInput);
-#endif
-      if (incr < 0) {
-        NS_WARNING("mbtowc failed: possible charset mismatch");
-        ** aOutput = (unsigned char)**aInput; // truncate
-        incr = 1;
-      }
-      // most likely we're dead anyways if this assertion should fire
-      NS_ASSERTION(uint32_t(incr) <= *aOutputLeft, "wrote beyond end of string");
-      (*aOutput) += incr;
-      (*aOutputLeft) -= incr;
-      (*aInput)++;
-      (*aInputLeft)--;
-    }
-  } else {
-    // wchar_t isn't unicode, so the best we can do is treat the
-    // input as if it is isolatin1 :(
-    utf16_to_isolatin1(aInput, aInputLeft, aOutput, aOutputLeft);
-  }
-
-  return NS_OK;
-}
-
-// XXX : for now, return false
-bool
-nsNativeCharsetConverter::IsNativeUTF8()
-{
-  return false;
-}
-
-#endif // USE_STDCONV
-
-//-----------------------------------------------------------------------------
-// API implementation
-//-----------------------------------------------------------------------------
-
-nsresult
-NS_CopyNativeToUnicode(const nsACString& aInput, nsAString& aOutput)
-{
-  aOutput.Truncate();
-
-  uint32_t inputLen = aInput.Length();
-
-  nsACString::const_iterator iter;
-  aInput.BeginReading(iter);
-
-  //
-  // OPTIMIZATION: preallocate space for largest possible result; convert
-  // directly into the result buffer to avoid intermediate buffer copy.
-  //
-  // this will generally result in a larger allocation, but that seems
-  // better than an extra buffer copy.
-  //
-  if (!aOutput.SetLength(inputLen, fallible)) {
-    return NS_ERROR_OUT_OF_MEMORY;
-  }
-  nsAString::iterator out_iter;
-  aOutput.BeginWriting(out_iter);
-
-  char16_t* result = out_iter.get();
-  uint32_t resultLeft = inputLen;
-
-  const char* buf = iter.get();
-  uint32_t bufLeft = inputLen;
-
-  nsNativeCharsetConverter conv;
-  nsresult rv = conv.NativeToUnicode(&buf, &bufLeft, &result, &resultLeft);
-  if (NS_SUCCEEDED(rv)) {
-    NS_ASSERTION(bufLeft == 0, "did not consume entire input buffer");
-    aOutput.SetLength(inputLen - resultLeft);
-  }
-  return rv;
-}
-
-nsresult
-NS_CopyUnicodeToNative(const nsAString& aInput, nsACString& aOutput)
-{
-  aOutput.Truncate();
-
-  nsAString::const_iterator iter, end;
-  aInput.BeginReading(iter);
-  aInput.EndReading(end);
-
-  // cannot easily avoid intermediate buffer copy.
-  char temp[4096];
-
-  nsNativeCharsetConverter conv;
-
-  const char16_t* buf = iter.get();
-  uint32_t bufLeft = Distance(iter, end);
-  while (bufLeft) {
-    char* p = temp;
-    uint32_t tempLeft = sizeof(temp);
-
-    nsresult rv = conv.UnicodeToNative(&buf, &bufLeft, &p, &tempLeft);
-    if (NS_FAILED(rv)) {
-      return rv;
-    }
-
-    if (tempLeft < sizeof(temp)) {
-      aOutput.Append(temp, sizeof(temp) - tempLeft);
-    }
-  }
-  return NS_OK;
-}
-
-bool
-NS_IsNativeUTF8()
-{
-  return nsNativeCharsetConverter::IsNativeUTF8();
-}
-
-void
-NS_StartupNativeCharsetUtils()
-{
-  //
-  // need to initialize the locale or else charset conversion will fail.
-  // better not delay this in case some other component alters the locale
-  // settings.
-  //
-  // XXX we assume that we are called early enough that we should
-  // always be the first to care about the locale's charset.
-  //
-  setlocale(LC_CTYPE, "");
-
-  nsNativeCharsetConverter::GlobalInit();
-}
-
-void
-NS_ShutdownNativeCharsetUtils()
-{
-  nsNativeCharsetConverter::GlobalShutdown();
-}
-
 //-----------------------------------------------------------------------------
 // XP_WIN
 //-----------------------------------------------------------------------------
-#elif defined(XP_WIN)
+#else
 
 #include <windows.h>
 #include "nsString.h"
 #include "nsAString.h"
 #include "nsReadableUtils.h"
 
 using namespace mozilla;
 
@@ -969,37 +103,9 @@ NS_CopyUnicodeToNative(const nsAString& 
     char* result = out_iter.get();
 
     ::WideCharToMultiByte(CP_ACP, 0, buf, inputLen, result, resultLen,
                           &defaultChar, nullptr);
   }
   return NS_OK;
 }
 
-#else
-
-#include "nsReadableUtils.h"
-
-nsresult
-NS_CopyNativeToUnicode(const nsACString& aInput, nsAString& aOutput)
-{
-  CopyASCIItoUTF16(aInput, aOutput);
-  return NS_OK;
-}
-
-nsresult
-NS_CopyUnicodeToNative(const nsAString& aInput, nsACString& aOutput)
-{
-  LossyCopyUTF16toASCII(aInput, aOutput);
-  return NS_OK;
-}
-
-void
-NS_StartupNativeCharsetUtils()
-{
-}
-
-void
-NS_ShutdownNativeCharsetUtils()
-{
-}
-
 #endif
--- a/xpcom/io/nsNativeCharsetUtils.h
+++ b/xpcom/io/nsNativeCharsetUtils.h
@@ -10,54 +10,43 @@
 
 /*****************************************************************************\
  *                                                                           *
  *                             **** NOTICE ****                              *
  *                                                                           *
  *             *** THESE ARE NOT GENERAL PURPOSE CONVERTERS ***              *
  *                                                                           *
  *    NS_CopyNativeToUnicode / NS_CopyUnicodeToNative should only be used    *
- *    for converting *FILENAMES* between native and unicode. They are not    *
+ *    for converting *FILENAMES* between bytes and UTF-16. They are not      *
  *    designed or tested for general encoding converter use.                 *
  *                                                                           *
+ *    On Windows, these functions convert to and from the system's legacy    *
+ *    code page, which cannot represent all of Unicode. Elsewhere, these     *
+ *    convert to and from UTF-8.                                             *
+ *                                                                           *
 \*****************************************************************************/
 
 /**
  * thread-safe conversion routines that do not depend on uconv libraries.
  */
 nsresult NS_CopyNativeToUnicode(const nsACString& aInput, nsAString& aOutput);
 nsresult NS_CopyUnicodeToNative(const nsAString& aInput, nsACString& aOutput);
 
 /*
  * This function indicates whether the character encoding used in the file
  * system (more exactly what's used for |GetNativeFoo| and |SetNativeFoo|
  * of |nsIFile|) is UTF-8 or not. Knowing that helps us avoid an
  * unncessary encoding conversion in some cases. For instance, to get the leaf
  * name in UTF-8 out of nsIFile, we can just use |GetNativeLeafName| rather
  * than using |GetLeafName| and converting the result to UTF-8 if the file
  * system  encoding is UTF-8.
- * On Unix (but not on Mac OS X), it depends on the locale and is not known
- * in advance (at the compilation time) so that this function needs to be
- * a real function. On Mac OS X it's always UTF-8 while on Windows
- * and other platforms (e.g. OS2), it's never UTF-8.
  */
-#if defined(XP_UNIX) && !defined(XP_MACOSX) && !defined(ANDROID)
-bool NS_IsNativeUTF8();
-#else
-inline bool
+inline constexpr bool
 NS_IsNativeUTF8()
 {
-#if defined(XP_MACOSX) || defined(ANDROID)
+#ifdef XP_WIN
+  return false;
+#else
   return true;
-#else
-  return false;
 #endif
 }
-#endif
-
-
-/**
- * internal
- */
-void NS_StartupNativeCharsetUtils();
-void NS_ShutdownNativeCharsetUtils();
 
 #endif // nsNativeCharsetUtils_h__
author	Henri Sivonen <hsivonen@hsivonen.fi>
	Tue, 05 Dec 2017 13:33:52 +0200
changeset 707588	c8db529c3c4db825800be65e9609f3ee8344e36a
parent 707519	e56181d42ce2da739b668c33cd92622b5c4fa6f8
child 709006	84346795b9374a3b113e4168f37ea1bf7a435c58
child 709513	abdb267c33222b55729bd52a022f226c60af6201
push id	92169
push user	bmo:hsivonen@hsivonen.fi
push date	Tue, 05 Dec 2017 15:27:50 +0000
bugs	960957
milestone	59.0a1
xpcom/build/XPCOMInit.cpp		file \| annotate \| diff \| comparison \| revisions
xpcom/io/nsNativeCharsetUtils.cpp		file \| annotate \| diff \| comparison \| revisions
xpcom/io/nsNativeCharsetUtils.h		file \| annotate \| diff \| comparison \| revisions