Bug 241788 - net_FilterURIString should filter \r\n\t from the entire URL r=honzab draft
authorValentin Gosu <valentin.gosu@gmail.com>
Fri, 29 Jan 2016 16:25:24 +0100
changeset 327082 776002cfda394c1f953e64f9da22ac4644be3282
parent 327081 b998e916a040cd36ef64161747c7cc1e5ef87d1d
child 513651 e4d2cf104ff9069c783f7cf35e5ffce7776d45fd
push id10188
push uservalentin.gosu@gmail.com
push dateFri, 29 Jan 2016 15:26:10 +0000
reviewershonzab
bugs241788
milestone46.0a1
Bug 241788 - net_FilterURIString should filter \r\n\t from the entire URL r=honzab * net_ExtractURLScheme now uses mozilla::Tokenizer * net_FilterURIString also filters characters in the scheme now * removed startPos and endPos parameters for net_FilterURIString and introduced net_IsAbsoluteURL
netwerk/base/nsIOService.cpp
netwerk/base/nsStandardURL.cpp
netwerk/base/nsURLHelper.cpp
netwerk/base/nsURLHelper.h
netwerk/protocol/http/Http2Stream.cpp
netwerk/protocol/res/SubstitutingProtocolHandler.cpp
netwerk/streamconv/converters/nsIndexedToHTML.cpp
netwerk/test/unit/test_standardurl.js
--- a/netwerk/base/nsIOService.cpp
+++ b/netwerk/base/nsIOService.cpp
@@ -559,17 +559,17 @@ nsIOService::GetProtocolHandler(const ch
         return NS_ERROR_UNKNOWN_PROTOCOL;
 
     return rv;
 }
 
 NS_IMETHODIMP
 nsIOService::ExtractScheme(const nsACString &inURI, nsACString &scheme)
 {
-    return net_ExtractURLScheme(inURI, nullptr, nullptr, &scheme);
+    return net_ExtractURLScheme(inURI, scheme);
 }
 
 NS_IMETHODIMP 
 nsIOService::GetProtocolFlags(const char* scheme, uint32_t *flags)
 {
     nsCOMPtr<nsIProtocolHandler> handler;
     nsresult rv = GetProtocolHandler(scheme, getter_AddRefs(handler));
     if (NS_FAILED(rv)) return rv;
--- a/netwerk/base/nsStandardURL.cpp
+++ b/netwerk/base/nsStandardURL.cpp
@@ -2828,30 +2828,18 @@ nsStandardURL::Init(uint32_t urlType,
             IsUTFCharset(mOriginCharset.get())) {
             mOriginCharset.Truncate();
         }
     }
     else if (!IsUTFCharset(charset)) {
         mOriginCharset = charset;
     }
 
-    if (baseURI) {
-        uint32_t start, end;
-        // pull out the scheme and where it ends
-        nsresult rv = net_ExtractURLScheme(spec, &start, &end, nullptr);
-        if (NS_SUCCEEDED(rv) && spec.Length() > end+2) {
-            nsACString::const_iterator slash;
-            spec.BeginReading(slash);
-            slash.advance(end+1);
-            // then check if // follows
-            // if it follows, aSpec is really absolute ... 
-            // ignore aBaseURI in this case
-            if (*slash == '/' && *(++slash) == '/')
-                baseURI = nullptr;
-        }
+    if (baseURI && net_IsAbsoluteURL(spec)) {
+        baseURI = nullptr;
     }
 
     if (!baseURI)
         return SetSpec(spec);
 
     nsAutoCString buf;
     nsresult rv = baseURI->Resolve(spec, buf);
     if (NS_FAILED(rv)) return rv;
--- a/netwerk/base/nsURLHelper.cpp
+++ b/netwerk/base/nsURLHelper.cpp
@@ -9,16 +9,17 @@
 #include "nsURLHelper.h"
 #include "nsIFile.h"
 #include "nsIURLParser.h"
 #include "nsCOMPtr.h"
 #include "nsCRT.h"
 #include "nsNetCID.h"
 #include "mozilla/Preferences.h"
 #include "prnetdb.h"
+#include "mozilla/Tokenizer.h"
 
 using namespace mozilla;
 
 //----------------------------------------------------------------------------
 // Init/Shutdown
 //----------------------------------------------------------------------------
 
 static bool gInitialized = false;
@@ -174,22 +175,22 @@ net_ParseFileURL(const nsACString &inURL
     }
 
     outDirectory.Truncate();
     outFileBaseName.Truncate();
     outFileExtension.Truncate();
 
     const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
     const char *url = flatURL.get();
-    
-    uint32_t schemeBeg, schemeEnd;
-    rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr);
+
+    nsAutoCString scheme;
+    rv = net_ExtractURLScheme(flatURL, scheme);
     if (NS_FAILED(rv)) return rv;
 
-    if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {
+    if (!scheme.EqualsLiteral("file")) {
         NS_ERROR("must be a file:// url");
         return NS_ERROR_UNEXPECTED;
     }
 
     nsIURLParser *parser = net_GetNoAuthURLParser();
     NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
 
     uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
@@ -478,67 +479,62 @@ net_ResolveRelativePath(const nsACString
     result = path;
     return NS_OK;
 }
 
 //----------------------------------------------------------------------------
 // scheme fu
 //----------------------------------------------------------------------------
 
+#if !defined(MOZILLA_XPCOMRT_API)
+static bool isAsciiAlpha(char c) {
+    return nsCRT::IsAsciiAlpha(c);
+}
+
+static bool
+net_IsValidSchemeChar(const char aChar)
+{
+    if (nsCRT::IsAsciiAlpha(aChar) || nsCRT::IsAsciiDigit(aChar) ||
+        aChar == '+' || aChar == '.' || aChar == '-') {
+        return true;
+    }
+    return false;
+}
+#endif
+
 /* Extract URI-Scheme if possible */
 nsresult
 net_ExtractURLScheme(const nsACString &inURI,
-                     uint32_t *startPos, 
-                     uint32_t *endPos,
-                     nsACString *scheme)
+                     nsACString& scheme)
 {
-    // search for something up to a colon, and call it the scheme
-    const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);
-    const char* uri_start = flatURI.get();
-    const char* uri = uri_start;
+#if !defined(MOZILLA_XPCOMRT_API)
+    Tokenizer p(inURI, "\r\n\t");
 
-    if (!uri)
-        return NS_ERROR_MALFORMED_URI;
-
-    // skip leading white space
-    while (nsCRT::IsAsciiSpace(*uri))
-        uri++;
-
-    uint32_t start = uri - uri_start;
-    if (startPos) {
-        *startPos = start;
+    while (p.CheckWhite() || p.CheckChar(' ')) {
+        // Skip leading whitespace
     }
 
-    uint32_t length = 0;
-    char c;
-    while ((c = *uri++) != '\0') {
-        // First char must be Alpha
-        if (length == 0 && nsCRT::IsAsciiAlpha(c)) {
-            length++;
-        } 
-        // Next chars can be alpha + digit + some special chars
-        else if (length > 0 && (nsCRT::IsAsciiAlpha(c) || 
-                 nsCRT::IsAsciiDigit(c) || c == '+' || 
-                 c == '.' || c == '-')) {
-            length++;
-        }
-        // stop if colon reached but not as first char
-        else if (c == ':' && length > 0) {
-            if (endPos) {
-                *endPos = start + length;
-            }
+    p.Record();
+    if (!p.CheckChar(isAsciiAlpha)) {
+        // First char must be alpha
+        return NS_ERROR_MALFORMED_URI;
+    }
 
-            if (scheme)
-                scheme->Assign(Substring(inURI, start, length));
-            return NS_OK;
-        }
-        else 
-            break;
+    while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
+        // Skip valid scheme characters or \r\n\t
     }
-    return NS_ERROR_MALFORMED_URI;
+
+    if (!p.CheckChar(':')) {
+        return NS_ERROR_MALFORMED_URI;
+    }
+
+    p.Claim(scheme);
+    scheme.StripChars("\r\n\t");
+#endif
+    return NS_OK;
 }
 
 bool
 net_IsValidScheme(const char *scheme, uint32_t schemeLen)
 {
     // first char must be alpha
     if (!nsCRT::IsAsciiAlpha(*scheme))
         return false;
@@ -552,96 +548,82 @@ net_IsValidScheme(const char *scheme, ui
               *scheme == '-'))
             return false;
     }
 
     return true;
 }
 
 bool
+net_IsAbsoluteURL(const nsACString& uri)
+{
+#if !defined(MOZILLA_XPCOMRT_API)
+    Tokenizer p(uri, "\r\n\t");
+
+    while (p.CheckWhite() || p.CheckChar(' ')) {
+        // Skip leading whitespace
+    }
+
+    // First char must be alpha
+    if (!p.CheckChar(isAsciiAlpha)) {
+        return false;
+    }
+
+    while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
+        // Skip valid scheme characters or \r\n\t
+    }
+    if (!p.CheckChar(':')) {
+        return false;
+    }
+    p.SkipWhites();
+
+    if (!p.CheckChar('/')) {
+        return false;
+    }
+    p.SkipWhites();
+
+    if (p.CheckChar('/')) {
+        // aSpec is really absolute. Ignore aBaseURI in this case
+        return true;
+    }
+#endif
+    return false;
+}
+
+bool
 net_FilterURIString(const char *str, nsACString& result)
 {
     NS_PRECONDITION(str, "Must have a non-null string!");
-    bool writing = false;
     result.Truncate();
     const char *p = str;
 
-    // Remove leading spaces, tabs, CR, LF if any.
-    while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
-        writing = true;
-        str = p + 1;
+    // Figure out if we need to filter anything.
+    bool writing = false;
+    while (*p) {
+        if (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
+            writing = true;
+            break;
+        }
         p++;
     }
 
-    // Don't strip from the scheme, because other code assumes everything
-    // up to the ':' is the scheme, and it's bad not to have it match.
-    // If there's no ':', strip.
-    bool found_colon = false;
-    const char *first = nullptr;
-    while (*p) {
-        switch (*p) {
-            case '\t': 
-            case '\r': 
-            case '\n':
-                if (found_colon) {
-                    writing = true;
-                    // append chars up to but not including *p
-                    if (p > str)
-                        result.Append(str, p - str);
-                    str = p + 1;
-                } else {
-                    // remember where the first \t\r\n was in case we find no scheme
-                    if (!first)
-                        first = p;
-                }
-                break;
-
-            case ':':
-                found_colon = true;
-                break;
-
-            case '/':
-            case '@':
-                if (!found_colon) {
-                    // colon also has to precede / or @ to be a scheme
-                    found_colon = true; // not really, but means ok to strip
-                    if (first) {
-                        // go back and replace
-                        p = first;
-                        continue; // process *p again
-                    }
-                }
-                break;
-
-            default:
-                break;
-        }
-        p++;
-
-        // At end, if there was no scheme, and we hit a control char, fix
-        // it up now.
-        if (!*p && first != nullptr && !found_colon) {
-            // TRICKY - to avoid duplicating code, we reset the loop back
-            // to the point we found something to do
-            p = first;
-            // This also stops us from looping after we finish
-            found_colon = true; // so we'll replace \t\r\n
-        }
+    if (!writing) {
+        // Nothing to strip or filter
+        return false;
     }
 
-    // Remove trailing spaces if any
-    while (((p-1) >= str) && (*(p-1) == ' ')) {
-        writing = true;
-        p--;
-    }
+    nsAutoCString temp;
 
-    if (writing && p > str)
-        result.Append(str, p - str);
+    temp.Assign(str);
+    temp.Trim("\r\n\t ");
+    temp.StripChars("\r\n\t");
 
-    return writing;
+    result.Assign(temp);
+
+    return true;
 }
 
 #if defined(XP_WIN)
 bool
 net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
 {
     bool writing = false;
 
--- a/netwerk/base/nsURLHelper.h
+++ b/netwerk/base/nsURLHelper.h
@@ -75,27 +75,31 @@ void net_CoalesceDirs(netCoalesceFlags f
  *
  * @return a new string, representing canonical uri
  */
 nsresult net_ResolveRelativePath(const nsACString &relativePath,
                                              const nsACString &basePath,
                                              nsACString &result);
 
 /**
+ * Check if a URL is absolute
+ *
+ * @param inURL     URL spec
+ * @return true if the given spec represents an absolute URL
+ */
+bool net_IsAbsoluteURL(const nsACString& inURL);
+
+/**
  * Extract URI-Scheme if possible
  *
  * @param inURI     URI spec
- * @param startPos  start of scheme (may be null)
- * @param endPos    end of scheme; index of colon (may be null)
  * @param scheme    scheme copied to this buffer on return (may be null)
  */
 nsresult net_ExtractURLScheme(const nsACString &inURI,
-                                          uint32_t *startPos, 
-                                          uint32_t *endPos,
-                                          nsACString *scheme = nullptr);
+                              nsACString &scheme);
 
 /* check that the given scheme conforms to RFC 2396 */
 bool net_IsValidScheme(const char *scheme, uint32_t schemeLen);
 
 inline bool net_IsValidScheme(const nsAFlatCString &scheme)
 {
     return net_IsValidScheme(scheme.get(), scheme.Length());
 }
@@ -104,18 +108,17 @@ inline bool net_IsValidScheme(const nsAF
  * Filter out whitespace from a URI string.  The input is the |str|
  * pointer. |result| is written to if and only if there is whitespace that has
  * to be filtered out.  The return value is true if and only if |result| is
  * written to.
  *
  * This function strips out all whitespace at the beginning and end of the URL
  * and strips out \r, \n, \t from the middle of the URL.  This makes it safe to
  * call on things like javascript: urls or data: urls, where we may in fact run
- * into whitespace that is not properly encoded.  Note that stripping does not
- * occur in the scheme portion itself.
+ * into whitespace that is not properly encoded.
  *
  * @param str the pointer to the string to filter.  Must be non-null.
  * @param result the out param to write to if filtering happens
  * @return whether result was written to
  */
 bool net_FilterURIString(const char *str, nsACString& result);
 
 #if defined(XP_WIN)
--- a/netwerk/protocol/http/Http2Stream.cpp
+++ b/netwerk/protocol/http/Http2Stream.cpp
@@ -339,17 +339,17 @@ Http2Stream::WriteSegments(nsAHttpSegmen
   mSegmentWriter = nullptr;
   return rv;
 }
 
 nsresult
 Http2Stream::MakeOriginURL(const nsACString &origin, RefPtr<nsStandardURL> &url)
 {
   nsAutoCString scheme;
-  nsresult rv = net_ExtractURLScheme(origin, nullptr, nullptr, &scheme);
+  nsresult rv = net_ExtractURLScheme(origin, scheme);
   NS_ENSURE_SUCCESS(rv, rv);
   return MakeOriginURL(scheme, origin, url);
 }
 
 nsresult
 Http2Stream::MakeOriginURL(const nsACString &scheme, const nsACString &origin,
                            RefPtr<nsStandardURL> &url)
 {
--- a/netwerk/protocol/res/SubstitutingProtocolHandler.cpp
+++ b/netwerk/protocol/res/SubstitutingProtocolHandler.cpp
@@ -49,17 +49,17 @@ SubstitutingURL::EnsureFile()
   MOZ_ASSERT(substHandler);
 
   nsAutoCString spec;
   rv = substHandler->ResolveURI(this, spec);
   if (NS_FAILED(rv))
     return rv;
 
   nsAutoCString scheme;
-  rv = net_ExtractURLScheme(spec, nullptr, nullptr, &scheme);
+  rv = net_ExtractURLScheme(spec, scheme);
   if (NS_FAILED(rv))
     return rv;
 
   // Bug 585869:
   // In most cases, the scheme is jar if it's not file.
   // Regardless, net_GetFileFromURLSpec should be avoided
   // when the scheme isn't file.
   if (!scheme.EqualsLiteral("file"))
--- a/netwerk/streamconv/converters/nsIndexedToHTML.cpp
+++ b/netwerk/streamconv/converters/nsIndexedToHTML.cpp
@@ -755,18 +755,20 @@ nsIndexedToHTML::OnIndexAvailable(nsIReq
         loc.Append('/');
     }
 
     // now minimally re-escape the location...
     uint32_t escFlags;
     // for some protocols, we expect the location to be absolute.
     // if so, and if the location indeed appears to be a valid URI, then go
     // ahead and treat it like one.
+
+    nsAutoCString scheme;
     if (mExpectAbsLoc &&
-        NS_SUCCEEDED(net_ExtractURLScheme(loc, nullptr, nullptr, nullptr))) {
+        NS_SUCCEEDED(net_ExtractURLScheme(loc, scheme))) {
         // escape as absolute 
         escFlags = esc_Forced | esc_AlwaysCopy | esc_Minimal;
     }
     else {
         // escape as relative
         // esc_Directory is needed because directories have a trailing slash.
         // Without it, the trailing '/' will be escaped, and links from within
         // that directory will be incorrect
--- a/netwerk/test/unit/test_standardurl.js
+++ b/netwerk/test/unit/test_standardurl.js
@@ -268,21 +268,28 @@ function test_percentDecoding()
   var url = stringToURL("http://%70%61%73%74%65%62%69%6E.com");
   do_check_eq(url.spec, "http://pastebin.com/");
 
   // We shouldn't unescape characters that are not allowed in the hostname.
   url = stringToURL("http://example.com%0a%23.google.com/");
   do_check_eq(url.spec, "http://example.com%0a%23.google.com/");
 }
 
+function test_filterWhitespace()
+{
+  var url = stringToURL(" \r\n\th\nt\rt\tp://ex\r\n\tample.com/path\r\n\t/\r\n\tto the/fil\r\n\te.e\r\n\txt?que\r\n\try#ha\r\n\tsh \r\n\t ");
+  do_check_eq(url.spec, "http://example.com/path/to%20the/file.ext?query#hash");
+}
+
 function run_test()
 {
   test_setEmptyPath();
   test_setQuery();
   test_setRef();
   test_ipv6();
   test_ipv6_fail();
   test_clearedSpec();
   test_escapeBrackets();
   test_apostropheEncoding();
   test_accentEncoding();
   test_percentDecoding();
+  test_filterWhitespace();
 }