Bug 1225255 - Encode URL query string segments to bytes with HTML numeric character references for unmappable characters. draft
authorHenri Sivonen <hsivonen@hsivonen.fi>
Fri, 15 Jul 2016 14:49:22 +0300
changeset 444147 147c0e75306fc266de7d0b12e10b5c619a1b716e
parent 443568 9f6533d92b9610ae05f57b561b0c0ef56dd21d7e
child 538241 8c6cdb9eac1fbb0df72204947275ab3f338c2e1d
push id37204
push userhsivonen@mozilla.com
push dateSat, 26 Nov 2016 14:20:53 +0000
bugs1225255
milestone53.0a1
Bug 1225255 - Encode URL query string segments to bytes with HTML numeric character references for unmappable characters. MozReview-Commit-ID: HDJxSSZZYlw
netwerk/base/nsStandardURL.cpp
netwerk/base/nsStandardURL.h
testing/web-platform/tests/html/infrastructure/urls/resolving-urls/query-encoding/resources/resolve-url.js
--- a/netwerk/base/nsStandardURL.cpp
+++ b/netwerk/base/nsStandardURL.cpp
@@ -150,62 +150,16 @@ char nsStandardURL::gHostLimitDigits[] =
   PR_BEGIN_MACRO \
     if (!mMutable) { \
         NS_WARNING("attempt to modify an immutable nsStandardURL"); \
         return NS_ERROR_ABORT; \
     } \
   PR_END_MACRO
 
 //----------------------------------------------------------------------------
-
-static nsresult
-EncodeString(nsIUnicodeEncoder *encoder, const nsAFlatString &str, nsACString &result)
-{
-    nsresult rv;
-    int32_t len = str.Length();
-    int32_t maxlen;
-
-    rv = encoder->GetMaxLength(str.get(), len, &maxlen);
-    if (NS_FAILED(rv))
-        return rv;
-
-    char buf[256], *p = buf;
-    if (uint32_t(maxlen) > sizeof(buf) - 1) {
-        p = (char *) malloc(maxlen + 1);
-        if (!p)
-            return NS_ERROR_OUT_OF_MEMORY;
-    }
-
-    rv = encoder->Convert(str.get(), &len, p, &maxlen);
-    if (NS_FAILED(rv))
-        goto end;
-    if (rv == NS_ERROR_UENC_NOMAPPING) {
-        NS_WARNING("unicode conversion failed");
-        rv = NS_ERROR_UNEXPECTED;
-        goto end;
-    }
-    p[maxlen] = 0;
-    result.Assign(p);
-
-    len = sizeof(buf) - 1;
-    rv = encoder->Finish(buf, &len);
-    if (NS_FAILED(rv))
-        goto end;
-    buf[len] = 0;
-    result.Append(buf);
-
-end:
-    encoder->Reset();
-
-    if (p != buf)
-        free(p);
-    return rv;
-}
-
-//----------------------------------------------------------------------------
 // nsStandardURL::nsPrefObserver
 //----------------------------------------------------------------------------
 
 #define NS_NET_PREF_ESCAPEUTF8         "network.standard-url.escape-utf8"
 #define NS_NET_PREF_ALWAYSENCODEINUTF8 "network.standard-url.encode-utf8"
 #define NS_NET_PREF_ENABLE_RUST        "network.standard-url.enable-rust"
 
 NS_IMPL_ISUPPORTS(nsStandardURL::nsPrefObserver, nsIObserver)
@@ -257,17 +211,17 @@ nsSegmentEncoder::EncodeSegmentCount(con
         // only do this if the segment is non-ASCII.  Further, if mCharset is
         // null or the empty string then the origin charset is UTF-8 and there
         // is nothing to do.
         nsAutoCString encBuf;
         if (mCharset && *mCharset && !nsCRT::IsAscii(str + pos, len)) {
             // we have to encode this segment
             if (mEncoder || InitUnicodeEncoder()) {
                 NS_ConvertUTF8toUTF16 ucsBuf(Substring(str + pos, str + pos + len));
-                if (NS_SUCCEEDED(EncodeString(mEncoder, ucsBuf, encBuf))) {
+                if (mEncoder->Encode(ucsBuf, encBuf)) {
                     str = encBuf.get();
                     pos = 0;
                     len = encBuf.Length();
                 }
                 // else some failure occurred... assume UTF-8 is ok.
             }
         }
 
@@ -306,24 +260,24 @@ nsSegmentEncoder::EncodeSegment(const ns
 
 bool nsStandardURL::
 nsSegmentEncoder::InitUnicodeEncoder()
 {
     NS_ASSERTION(!mEncoder, "Don't call this if we have an encoder already!");
     // "replacement" won't survive another label resolution
     nsDependentCString label(mCharset);
     if (label.EqualsLiteral("replacement")) {
-      mEncoder = EncodingUtils::EncoderForEncoding(label);
-      return true;
+      // Returning false here causes the caller to use UTF-8.
+      return false;
     }
     nsAutoCString encoding;
     if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
       return false;
     }
-    mEncoder = EncodingUtils::EncoderForEncoding(encoding);
+    mEncoder = MakeUnique<nsNCRFallbackEncoderWrapper>(encoding);
     return true;
 }
 
 #define GET_SEGMENT_ENCODER_INTERNAL(name, useUTF8) \
     nsSegmentEncoder name(useUTF8 ? nullptr : mOriginCharset.get())
 
 #define GET_SEGMENT_ENCODER(name) \
     GET_SEGMENT_ENCODER_INTERNAL(name, gAlwaysEncodeInUTF8)
--- a/netwerk/base/nsStandardURL.h
+++ b/netwerk/base/nsStandardURL.h
@@ -5,17 +5,17 @@
 
 #ifndef nsStandardURL_h__
 #define nsStandardURL_h__
 
 #include "nsString.h"
 #include "nsISerializable.h"
 #include "nsIFileURL.h"
 #include "nsIStandardURL.h"
-#include "nsIUnicodeEncoder.h"
+#include "nsNCRFallbackEncoderWrapper.h"
 #include "nsIObserver.h"
 #include "nsCOMPtr.h"
 #include "nsURLHelper.h"
 #include "nsIClassInfo.h"
 #include "nsISizeOf.h"
 #include "prclist.h"
 #include "mozilla/Attributes.h"
 #include "mozilla/MemoryReporting.h"
@@ -138,17 +138,17 @@ public: /* internal -- HPUX compiler can
         const nsACString &EncodeSegment(const nsASingleFragmentCString &str,
                                         int16_t mask,
                                         nsAFlatCString &buf);
     private:
         bool InitUnicodeEncoder();
         
         const char* mCharset;  // Caller should keep this alive for
                                // the life of the segment encoder
-        nsCOMPtr<nsIUnicodeEncoder> mEncoder;
+        mozilla::UniquePtr<nsNCRFallbackEncoderWrapper> mEncoder;
     };
     friend class nsSegmentEncoder;
 
 protected:
     // enum used in a few places to specify how .ref attribute should be handled
     enum RefHandlingEnum {
         eIgnoreRef,
         eHonorRef,
--- a/testing/web-platform/tests/html/infrastructure/urls/resolving-urls/query-encoding/resources/resolve-url.js
+++ b/testing/web-platform/tests/html/infrastructure/urls/resolving-urls/query-encoding/resources/resolve-url.js
@@ -9,17 +9,17 @@ onload = function() {
   var blank = 'resources/blank.py?encoding=' + encoding;
   var stash_put = 'resources/stash.py?q=\u00E5&action=put&id=';
   var stash_take = 'resources/stash.py?action=take&id=';
   var expected_obj = {
     'utf-8':'%C3%A5',
     'utf-16be':'%C3%A5',
     'utf-16le':'%C3%A5',
     'windows-1252':'%E5',
-    'windows-1251':'%3F'
+    'windows-1251':'&%23229;'
   };
   var expected_current = expected_obj[encoding];
   var expected_utf8 = expected_obj['utf-8'];
 
   function msg(expected, got) {
     return 'expected substring '+expected+' got '+got;
   }