Bug 1369317 - Filter and escape URI string in only one pass r=mcmanus draft
authorValentin Gosu <valentin.gosu@gmail.com>
Sun, 13 Aug 2017 10:03:34 +0200
changeset 645529 4995b4d8a133b8568af5b130be2077ee90f8b4e4
parent 645528 bc1dfc585066e09ece9511e6751cf36d33b79bef
child 725916 fff48c7447a7c1a640cd0103611e040bdcdc5540
push id73775
push uservalentin.gosu@gmail.com
push dateSun, 13 Aug 2017 08:03:58 +0000
reviewersmcmanus
bugs1369317
milestone57.0a1
Bug 1369317 - Filter and escape URI string in only one pass r=mcmanus MozReview-Commit-ID: KU4C4cS3jZC
netwerk/base/nsSimpleURI.cpp
netwerk/base/nsURLHelper.cpp
netwerk/base/nsURLHelper.h
xpcom/io/nsEscape.cpp
xpcom/io/nsEscape.h
--- a/netwerk/base/nsSimpleURI.cpp
+++ b/netwerk/base/nsSimpleURI.cpp
@@ -292,25 +292,20 @@ nsSimpleURI::SetSpec(const nsACString &a
     NS_ENSURE_STATE(mMutable);
 
     nsresult rv = net_ExtractURLScheme(aSpec, mScheme);
     if (NS_FAILED(rv)) {
         return rv;
     }
     ToLowerCase(mScheme);
 
-    // filter out unexpected chars "\r\n\t" if necessary
-    nsAutoCString filteredSpec;
-    net_FilterURIString(aSpec, filteredSpec);
-
-    // nsSimpleURI currently restricts the charset to US-ASCII
     nsAutoCString spec;
-    rv = NS_EscapeURL(filteredSpec, esc_OnlyNonASCII, spec, fallible);
+    rv = net_FilterAndEscapeURI(aSpec, esc_OnlyNonASCII, spec);
     if (NS_FAILED(rv)) {
-      return rv;
+        return rv;
     }
 
     int32_t colonPos = spec.FindChar(':');
     MOZ_ASSERT(colonPos != kNotFound, "A colon should be in this string");
     // This sets mPath, mQuery and mRef.
     return SetPathQueryRefEscaped(Substring(spec, colonPos + 1),
                                   /* needsEscape = */ false);
 }
--- a/netwerk/base/nsURLHelper.cpp
+++ b/netwerk/base/nsURLHelper.cpp
@@ -14,16 +14,17 @@
 #include "nsIFile.h"
 #include "nsIURLParser.h"
 #include "nsCOMPtr.h"
 #include "nsCRT.h"
 #include "nsNetCID.h"
 #include "mozilla/Preferences.h"
 #include "prnetdb.h"
 #include "mozilla/Tokenizer.h"
+#include "nsEscape.h"
 
 using namespace mozilla;
 
 //----------------------------------------------------------------------------
 // Init/Shutdown
 //----------------------------------------------------------------------------
 
 static bool gInitialized = false;
@@ -623,16 +624,36 @@ net_FilterURIString(const nsACString& in
     }
 
     result.Assign(Substring(newStart, newEnd));
     if (needsStrip) {
         result.StripTaggedASCII(mask);
     }
 }
 
+nsresult
+net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags, nsACString& aResult)
+{
+    aResult.Truncate();
+
+    auto start = aInput.BeginReading();
+    auto end = aInput.EndReading();
+
+    // Trim off leading and trailing invalid chars.
+    auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };
+    auto newStart = std::find_if(start, end, charFilter);
+    auto newEnd = std::find_if(
+        std::reverse_iterator<decltype(end)>(end),
+        std::reverse_iterator<decltype(newStart)>(newStart),
+        charFilter).base();
+
+    const ASCIIMaskArray& mask = ASCIIMask::MaskCRLFTab();
+    return NS_EscapeAndFilterURL(Substring(newStart, newEnd), aFlags,
+                                 &mask, aResult, fallible);
+}
 
 #if defined(XP_WIN)
 bool
 net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
 {
     bool writing = false;
 
     nsACString::const_iterator beginIter, endIter;
--- a/netwerk/base/nsURLHelper.h
+++ b/netwerk/base/nsURLHelper.h
@@ -110,16 +110,28 @@ inline bool net_IsValidScheme(const nsCS
  * it safe to call on things like javascript: urls or data: urls, where we may
  * in fact run into whitespace that is not properly encoded.
  *
  * @param input the URL spec we want to filter
  * @param result the out param to write to if filtering happens
  */
 void net_FilterURIString(const nsACString& input, nsACString& result);
 
+/**
+ * This function performs character stripping just like net_FilterURIString,
+ * with the added benefit of also performing percent escaping of dissallowed
+ * characters, all in one pass. Saving one pass is very important when operating
+ * on really large strings.
+ *
+ * @param aInput the URL spec we want to filter
+ * @param aFlags the flags which control which characters we escape
+ * @param aResult the out param to write to if filtering happens
+ */
+nsresult net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags, nsACString& aResult);
+
 #if defined(XP_WIN)
 /**
  * On Win32 and OS/2 system's a back-slash in a file:// URL is equivalent to a
  * forward-slash.  This function maps any back-slashes to forward-slashes.
  *
  * @param aURL
  *        The URL string to normalize (UTF-8 encoded).  This can be a
  *        relative URL segment.
--- a/xpcom/io/nsEscape.cpp
+++ b/xpcom/io/nsEscape.cpp
@@ -6,16 +6,17 @@
 
 #include "nsEscape.h"
 
 #include "mozilla/ArrayUtils.h"
 #include "mozilla/BinarySearch.h"
 #include "nsTArray.h"
 #include "nsCRT.h"
 #include "plstr.h"
+#include "nsASCIIMask.h"
 
 static const char hexCharsUpper[] = "0123456789ABCDEF";
 static const char hexCharsUpperLower[] = "0123456789ABCDEFabcdef";
 
 static const int netCharType[256] =
 /*  Bit 0       xalpha      -- the alphas
 **  Bit 1       xpalpha     -- as xalpha but
 **                             converts spaces to plus and plus to %2B
@@ -377,17 +378,18 @@ static uint16_t dontNeedEscape(uint16_t 
  * @param aResult String that has the URL escaped portion appended to. Only
  *  altered if the string is URL escaped or |esc_AlwaysCopy| is specified.
  * @param aDidAppend Indicates whether or not data was appended to |aResult|.
  * @return NS_ERROR_INVALID_ARG, NS_ERROR_OUT_OF_MEMORY on failure.
  */
 template<class T>
 static nsresult
 T_EscapeURL(const typename T::char_type* aPart, size_t aPartLen,
-            uint32_t aFlags, T& aResult, bool& aDidAppend)
+            uint32_t aFlags, const ASCIIMaskArray* aFilterMask,
+            T& aResult, bool& aDidAppend)
 {
   typedef nsCharTraits<typename T::char_type> traits;
   typedef typename traits::unsigned_char_type unsigned_char_type;
   static_assert(sizeof(*aPart) == 1 || sizeof(*aPart) == 2,
                 "unexpected char type");
 
   if (!aPart) {
     NS_NOTREACHED("null pointer");
@@ -404,16 +406,29 @@ T_EscapeURL(const typename T::char_type*
 
   typename T::char_type tempBuffer[100];
   unsigned int tempBufferPos = 0;
 
   bool previousIsNonASCII = false;
   for (size_t i = 0; i < aPartLen; ++i) {
     unsigned_char_type c = *src++;
 
+    // If there is a filter, we wish to skip any characters which match it.
+    // This is needed so we don't perform an extra pass just to extract the
+    // filtered characters.
+    if (aFilterMask && ASCIIMask::IsMasked(*aFilterMask, c)) {
+      if (!writing) {
+        if (!aResult.Append(aPart, i, fallible)) {
+          return NS_ERROR_OUT_OF_MEMORY;
+        }
+        writing = true;
+      }
+      continue;
+    }
+
     // if the char has not to be escaped or whatever follows % is
     // a valid escaped string, just copy the char.
     //
     // Also the % will not be escaped until forced
     // See bugzilla bug 61269 for details why we changed this
     //
     // And, we will not escape non-ascii characters if requested.
     // On special request we will also escape the colon even when
@@ -469,47 +484,66 @@ bool
 NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags,
              nsACString& aResult)
 {
   if (aPartLen < 0) {
     aPartLen = strlen(aPart);
   }
 
   bool result = false;
-  nsresult rv = T_EscapeURL(aPart, aPartLen, aFlags, aResult, result);
+  nsresult rv = T_EscapeURL(aPart, aPartLen, aFlags, nullptr, aResult, result);
   if (NS_FAILED(rv)) {
     ::NS_ABORT_OOM(aResult.Length() * sizeof(nsACString::char_type));
   }
 
   return result;
 }
 
 nsresult
 NS_EscapeURL(const nsACString& aStr, uint32_t aFlags, nsACString& aResult,
              const mozilla::fallible_t&)
 {
   bool appended = false;
-  nsresult rv = T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, aResult, appended);
+  nsresult rv = T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, nullptr, aResult, appended);
+  if (NS_FAILED(rv)) {
+    aResult.Truncate();
+    return rv;
+  }
+
+  if (!appended) {
+    aResult = aStr;
+  }
+
+  return rv;
+}
+
+nsresult
+NS_EscapeAndFilterURL(const nsACString& aStr, uint32_t aFlags,
+                      const ASCIIMaskArray* aFilterMask,
+                      nsACString& aResult, const mozilla::fallible_t&)
+{
+  bool appended = false;
+  nsresult rv = T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, aFilterMask, aResult, appended);
   if (NS_FAILED(rv)) {
     aResult.Truncate();
     return rv;
   }
 
   if (!appended) {
     aResult = aStr;
   }
 
   return rv;
 }
 
 const nsAString&
 NS_EscapeURL(const nsAString& aStr, uint32_t aFlags, nsAString& aResult)
 {
   bool result = false;
-  nsresult rv = T_EscapeURL<nsAString>(aStr.Data(), aStr.Length(), aFlags, aResult, result);
+  nsresult rv = T_EscapeURL<nsAString>(aStr.Data(), aStr.Length(), aFlags, nullptr, aResult, result);
 
   if (NS_FAILED(rv)) {
     ::NS_ABORT_OOM(aResult.Length() * sizeof(nsAString::char_type));
   }
 
   if (result) {
     return aResult;
   }
--- a/xpcom/io/nsEscape.h
+++ b/xpcom/io/nsEscape.h
@@ -164,16 +164,29 @@ NS_EscapeURL(const nsACString& aStr, uin
 /**
  * Fallible version of NS_EscapeURL. On success aResult will point to either
  * the original string or an escaped copy.
  */
 nsresult
 NS_EscapeURL(const nsACString& aStr, uint32_t aFlags, nsACString& aResult,
              const mozilla::fallible_t&);
 
+// Forward declaration for nsASCIIMask.h
+typedef std::array<bool, 128> ASCIIMaskArray;
+
+/**
+ * The same as NS_EscapeURL, except it also filters out characters that match
+ * aFilterMask.
+ */
+nsresult
+NS_EscapeAndFilterURL(const nsACString& aStr, uint32_t aFlags,
+                      const ASCIIMaskArray* aFilterMask,
+                      nsACString& aResult, const mozilla::fallible_t&);
+
+
 inline const nsACString&
 NS_UnescapeURL(const nsACString& aStr, uint32_t aFlags, nsACString& aResult)
 {
   if (NS_UnescapeURL(aStr.Data(), aStr.Length(), aFlags, aResult)) {
     return aResult;
   }
   return aStr;
 }