author Henri Sivonen <hsivonen@hsivonen.fi>

Wed, 14 Mar 2018 20:53:18 +0200

changeset 771512 d1c1b4f2ca3cf1e1f3f173869c3f3e678c17bc8b

parent 767529 bca9200e112f6d1bb718759c142a3a5adbd6fdf3

push id 103699

push user bmo:hsivonen@hsivonen.fi

push date Fri, 23 Mar 2018 07:18:34 +0000

bugs 1445692

milestone 61.0a1

intl/uconv/moz.build file | annotate | diff | comparison | revisions

intl/uconv/nsConverterInputStream.cpp file | annotate | diff | comparison | revisions

xpcom/io/nsIConverterInputStream.idl file | annotate | diff | comparison | revisions

xpcom/io/nsUnicharInputStream.cpp file | annotate | diff | comparison | revisions
--- a/intl/uconv/moz.build
+++ b/intl/uconv/moz.build
@@ -10,17 +10,18 @@ XPIDL_SOURCES += [
     'nsIScriptableUConv.idl',
     'nsITextToSubURI.idl',
     'nsIUTF8ConverterService.idl',
 ]
 
 XPIDL_MODULE = 'uconv'
 
 EXPORTS += [
-	'nsUConvCID.h',
+    'nsConverterInputStream.h',
+    'nsUConvCID.h',
 ]
 
 UNIFIED_SOURCES += [
     'nsConverterInputStream.cpp',
     'nsConverterOutputStream.cpp',
     'nsScriptableUConv.cpp',
     'nsTextToSubURI.cpp',
     'nsUConvModule.cpp',
--- a/intl/uconv/nsConverterInputStream.cpp
+++ b/intl/uconv/nsConverterInputStream.cpp
@@ -116,16 +116,19 @@ nsConverterInputStream::ReadSegments(nsW
   nsresult rv;
   if (0 == bytesToWrite) {
     // Fill the unichar buffer
     bytesToWrite = Fill(&rv);
     if (bytesToWrite <= 0) {
       *aReadCount = 0;
       return rv;
     }
+    if (NS_FAILED(rv)) {
+      return rv;
+    }
   }
 
   if (bytesToWrite > aCount)
     bytesToWrite = aCount;
 
   uint32_t bytesWritten;
   uint32_t totalBytesWritten = 0;
 
--- a/xpcom/io/nsIConverterInputStream.idl
+++ b/xpcom/io/nsIConverterInputStream.idl
@@ -15,18 +15,24 @@ interface nsIInputStream;
 [scriptable, uuid(FC66FFB6-5404-4908-A4A3-27F92FA0579D)]
 interface nsIConverterInputStream : nsIUnicharInputStream {
     /**
      * Default replacement char value, U+FFFD REPLACEMENT CHARACTER.
      */
     const char16_t DEFAULT_REPLACEMENT_CHARACTER = 0xFFFD;
 
     /**
+     * Special replacement character value that requests errors to
+     * be treated as fatal.
+     */
+    const char16_t ERRORS_ARE_FATAL = 0;
+
+    /**
      * Initialize this stream.
-     * @param aStream 
+     * @param aStream
      *        The underlying stream to read from.
      * @param aCharset
      *        The character encoding to use for converting the bytes of the
      *        stream. A null charset will be interpreted as UTF-8.
      * @param aBufferSize
      *        How many bytes to buffer.
      * @param aReplacementChar
      *        The character to replace unknown byte sequences in the stream
--- a/xpcom/io/nsUnicharInputStream.cpp
+++ b/xpcom/io/nsUnicharInputStream.cpp
@@ -7,17 +7,17 @@
 #include "nsUnicharInputStream.h"
 #include "nsIInputStream.h"
 #include "nsIServiceManager.h"
 #include "nsString.h"
 #include "nsTArray.h"
 #include "nsAutoPtr.h"
 #include "nsCRT.h"
 #include "nsStreamUtils.h"
-#include "nsUTF8Utils.h"
+#include "nsConverterInputStream.h"
 #include "mozilla/Attributes.h"
 #include <fcntl.h>
 #if defined(XP_WIN)
 #include <io.h>
 #else
 #include <unistd.h>
 #endif
 
@@ -119,280 +119,28 @@ StringUnicharInputStream::Close()
   mPos = mLen;
   return NS_OK;
 }
 
 NS_IMPL_ISUPPORTS(StringUnicharInputStream, nsIUnicharInputStream)
 
 //----------------------------------------------------------------------
 
-class UTF8InputStream final : public nsIUnicharInputStream
-{
-public:
-  UTF8InputStream();
-  nsresult Init(nsIInputStream* aStream);
-
-  NS_DECL_ISUPPORTS
-  NS_DECL_NSIUNICHARINPUTSTREAM
-
-private:
-  ~UTF8InputStream();
-
-protected:
-  int32_t Fill(nsresult* aErrorCode);
-
-  static void CountValidUTF8Bytes(const char* aBuf, uint32_t aMaxBytes,
-                                  uint32_t& aValidUTF8bytes,
-                                  uint32_t& aValidUTF16CodeUnits);
-
-  nsCOMPtr<nsIInputStream> mInput;
-  FallibleTArray<char> mByteData;
-  FallibleTArray<char16_t> mUnicharData;
-
-  uint32_t mByteDataOffset;
-  uint32_t mUnicharDataOffset;
-  uint32_t mUnicharDataLength;
-};
-
-UTF8InputStream::UTF8InputStream() :
-  mByteDataOffset(0),
-  mUnicharDataOffset(0),
-  mUnicharDataLength(0)
-{
-}
-
-nsresult
-UTF8InputStream::Init(nsIInputStream* aStream)
-{
-  if (!mByteData.SetCapacity(STRING_BUFFER_SIZE, mozilla::fallible) ||
-      !mUnicharData.SetCapacity(STRING_BUFFER_SIZE, mozilla::fallible)) {
-    return NS_ERROR_OUT_OF_MEMORY;
-  }
-  mInput = aStream;
-
-  return NS_OK;
-}
-
-NS_IMPL_ISUPPORTS(UTF8InputStream, nsIUnicharInputStream)
-
-UTF8InputStream::~UTF8InputStream()
-{
-  Close();
-}
-
-nsresult
-UTF8InputStream::Close()
-{
-  mInput = nullptr;
-  mByteData.Clear();
-  mUnicharData.Clear();
-  return NS_OK;
-}
-
-nsresult
-UTF8InputStream::Read(char16_t* aBuf, uint32_t aCount, uint32_t* aReadCount)
-{
-  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
-  uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
-  nsresult errorCode;
-  if (0 == readCount) {
-    // Fill the unichar buffer
-    int32_t bytesRead = Fill(&errorCode);
-    if (bytesRead <= 0) {
-      *aReadCount = 0;
-      return errorCode;
-    }
-    readCount = bytesRead;
-  }
-  if (readCount > aCount) {
-    readCount = aCount;
-  }
-  memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
-         readCount * sizeof(char16_t));
-  mUnicharDataOffset += readCount;
-  *aReadCount = readCount;
-  return NS_OK;
-}
-
-NS_IMETHODIMP
-UTF8InputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
-                              void* aClosure,
-                              uint32_t aCount, uint32_t* aReadCount)
-{
-  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
-  uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
-  nsresult rv = NS_OK;
-  if (0 == bytesToWrite) {
-    // Fill the unichar buffer
-    int32_t bytesRead = Fill(&rv);
-    if (bytesRead <= 0) {
-      *aReadCount = 0;
-      return rv;
-    }
-    bytesToWrite = bytesRead;
-  }
-
-  if (bytesToWrite > aCount) {
-    bytesToWrite = aCount;
-  }
-
-  uint32_t bytesWritten;
-  uint32_t totalBytesWritten = 0;
-
-  while (bytesToWrite) {
-    rv = aWriter(this, aClosure,
-                 mUnicharData.Elements() + mUnicharDataOffset,
-                 totalBytesWritten, bytesToWrite, &bytesWritten);
-
-    if (NS_FAILED(rv)) {
-      // don't propagate errors to the caller
-      break;
-    }
-
-    bytesToWrite -= bytesWritten;
-    totalBytesWritten += bytesWritten;
-    mUnicharDataOffset += bytesWritten;
-  }
-
-  *aReadCount = totalBytesWritten;
-
-  return NS_OK;
-}
-
-NS_IMETHODIMP
-UTF8InputStream::ReadString(uint32_t aCount, nsAString& aString,
-                            uint32_t* aReadCount)
-{
-  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
-  uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
-  nsresult errorCode;
-  if (0 == readCount) {
-    // Fill the unichar buffer
-    int32_t bytesRead = Fill(&errorCode);
-    if (bytesRead <= 0) {
-      *aReadCount = 0;
-      return errorCode;
-    }
-    readCount = bytesRead;
-  }
-  if (readCount > aCount) {
-    readCount = aCount;
-  }
-  const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
-  aString.Assign(buf, readCount);
-
-  mUnicharDataOffset += readCount;
-  *aReadCount = readCount;
-  return NS_OK;
-}
-
-int32_t
-UTF8InputStream::Fill(nsresult* aErrorCode)
-{
-  if (!mInput) {
-    // We already closed the stream!
-    *aErrorCode = NS_BASE_STREAM_CLOSED;
-    return -1;
-  }
-
-  NS_ASSERTION(mByteData.Length() >= mByteDataOffset, "unsigned madness");
-  uint32_t remainder = mByteData.Length() - mByteDataOffset;
-  mByteDataOffset = remainder;
-  uint32_t nb;
-  *aErrorCode = NS_FillArray(mByteData, mInput, remainder, &nb);
-  if (nb == 0) {
-    // Because we assume a many to one conversion, the lingering data
-    // in the byte buffer must be a partial conversion
-    // fragment. Because we know that we have received no more new
-    // data to add to it, we can't convert it. Therefore, we discard
-    // it.
-    return nb;
-  }
-  NS_ASSERTION(remainder + nb == mByteData.Length(), "bad nb");
-
-  // Now convert as much of the byte buffer to unicode as possible
-  uint32_t srcLen, dstLen;
-  CountValidUTF8Bytes(mByteData.Elements(), remainder + nb, srcLen, dstLen);
-
-  // the number of UCS2 characters should always be <= the number of
-  // UTF8 chars
-  NS_ASSERTION(remainder + nb >= srcLen, "cannot be longer than out buffer");
-  NS_ASSERTION(dstLen <= mUnicharData.Capacity(),
-               "Ouch. I would overflow my buffer if I wasn't so careful.");
-  if (dstLen > mUnicharData.Capacity()) {
-    return 0;
-  }
-
-  ConvertUTF8toUTF16 converter(mUnicharData.Elements());
-
-  nsACString::const_char_iterator start = mByteData.Elements();
-  nsACString::const_char_iterator end = mByteData.Elements() + srcLen;
-
-  copy_string(start, end, converter);
-  if (converter.Length() != dstLen) {
-    *aErrorCode = NS_BASE_STREAM_BAD_CONVERSION;
-    return -1;
-  }
-
-  mUnicharDataOffset = 0;
-  mUnicharDataLength = dstLen;
-  mByteDataOffset = srcLen;
-
-  return dstLen;
-}
-
-void
-UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer, uint32_t aMaxBytes,
-                                     uint32_t& aValidUTF8bytes,
-                                     uint32_t& aValidUTF16CodeUnits)
-{
-  const char* c = aBuffer;
-  const char* end = aBuffer + aMaxBytes;
-  const char* lastchar = c;  // pre-initialize in case of 0-length buffer
-  uint32_t utf16length = 0;
-  while (c < end && *c) {
-    lastchar = c;
-    utf16length++;
-
-    if (UTF8traits::isASCII(*c)) {
-      c++;
-    } else if (UTF8traits::is2byte(*c)) {
-      c += 2;
-    } else if (UTF8traits::is3byte(*c)) {
-      c += 3;
-    } else if (UTF8traits::is4byte(*c)) {
-      c += 4;
-      utf16length++; // add 1 more because this will be converted to a
-      // surrogate pair.
-    } else if (UTF8traits::is5byte(*c)) {
-      c += 5;
-    } else if (UTF8traits::is6byte(*c)) {
-      c += 6;
-    } else {
-      NS_WARNING("Unrecognized UTF8 string in UTF8InputStream::CountValidUTF8Bytes()");
-      break; // Otherwise we go into an infinite loop.  But what happens now?
-    }
-  }
-  if (c > end) {
-    c = lastchar;
-    utf16length--;
-  }
-
-  aValidUTF8bytes = c - aBuffer;
-  aValidUTF16CodeUnits = utf16length;
-}
-
 nsresult
 NS_NewUnicharInputStream(nsIInputStream* aStreamToWrap,
                          nsIUnicharInputStream** aResult)
 {
   *aResult = nullptr;
 
   // Create converter input stream
-  RefPtr<UTF8InputStream> it = new UTF8InputStream();
-  nsresult rv = it->Init(aStreamToWrap);
+  RefPtr<nsConverterInputStream> it = new nsConverterInputStream();
+  nsresult rv =
+    it->Init(aStreamToWrap,
+             "UTF-8",
+             STRING_BUFFER_SIZE,
+             nsIConverterInputStream::ERRORS_ARE_FATAL);
   if (NS_FAILED(rv)) {
     return rv;
   }
 
   it.forget(aResult);
   return NS_OK;
 }
author	Henri Sivonen <hsivonen@hsivonen.fi>
	Wed, 14 Mar 2018 20:53:18 +0200
changeset 771512	d1c1b4f2ca3cf1e1f3f173869c3f3e678c17bc8b
parent 767529	bca9200e112f6d1bb718759c142a3a5adbd6fdf3
push id	103699
push user	bmo:hsivonen@hsivonen.fi
push date	Fri, 23 Mar 2018 07:18:34 +0000
bugs	1445692
milestone	61.0a1
intl/uconv/moz.build		file \| annotate \| diff \| comparison \| revisions
intl/uconv/nsConverterInputStream.cpp		file \| annotate \| diff \| comparison \| revisions
xpcom/io/nsIConverterInputStream.idl		file \| annotate \| diff \| comparison \| revisions
xpcom/io/nsUnicharInputStream.cpp		file \| annotate \| diff \| comparison \| revisions