Bug 241788 - net_FilterURIString should filter \r\n\t from the entire URL r=honzab
* net_ExtractURLScheme now uses mozilla::Tokenizer
* net_FilterURIString also filters characters in the scheme now
* removed startPos and endPos parameters for net_FilterURIString and introduced net_IsAbsoluteURL
--- a/netwerk/base/nsIOService.cpp
+++ b/netwerk/base/nsIOService.cpp
@@ -559,17 +559,17 @@ nsIOService::GetProtocolHandler(const ch
return NS_ERROR_UNKNOWN_PROTOCOL;
return rv;
}
NS_IMETHODIMP
nsIOService::ExtractScheme(const nsACString &inURI, nsACString &scheme)
{
- return net_ExtractURLScheme(inURI, nullptr, nullptr, &scheme);
+ return net_ExtractURLScheme(inURI, scheme);
}
NS_IMETHODIMP
nsIOService::GetProtocolFlags(const char* scheme, uint32_t *flags)
{
nsCOMPtr<nsIProtocolHandler> handler;
nsresult rv = GetProtocolHandler(scheme, getter_AddRefs(handler));
if (NS_FAILED(rv)) return rv;
--- a/netwerk/base/nsStandardURL.cpp
+++ b/netwerk/base/nsStandardURL.cpp
@@ -2828,30 +2828,18 @@ nsStandardURL::Init(uint32_t urlType,
IsUTFCharset(mOriginCharset.get())) {
mOriginCharset.Truncate();
}
}
else if (!IsUTFCharset(charset)) {
mOriginCharset = charset;
}
- if (baseURI) {
- uint32_t start, end;
- // pull out the scheme and where it ends
- nsresult rv = net_ExtractURLScheme(spec, &start, &end, nullptr);
- if (NS_SUCCEEDED(rv) && spec.Length() > end+2) {
- nsACString::const_iterator slash;
- spec.BeginReading(slash);
- slash.advance(end+1);
- // then check if // follows
- // if it follows, aSpec is really absolute ...
- // ignore aBaseURI in this case
- if (*slash == '/' && *(++slash) == '/')
- baseURI = nullptr;
- }
+ if (baseURI && net_IsAbsoluteURL(spec)) {
+ baseURI = nullptr;
}
if (!baseURI)
return SetSpec(spec);
nsAutoCString buf;
nsresult rv = baseURI->Resolve(spec, buf);
if (NS_FAILED(rv)) return rv;
--- a/netwerk/base/nsURLHelper.cpp
+++ b/netwerk/base/nsURLHelper.cpp
@@ -9,16 +9,17 @@
#include "nsURLHelper.h"
#include "nsIFile.h"
#include "nsIURLParser.h"
#include "nsCOMPtr.h"
#include "nsCRT.h"
#include "nsNetCID.h"
#include "mozilla/Preferences.h"
#include "prnetdb.h"
+#include "mozilla/Tokenizer.h"
using namespace mozilla;
//----------------------------------------------------------------------------
// Init/Shutdown
//----------------------------------------------------------------------------
static bool gInitialized = false;
@@ -174,22 +175,22 @@ net_ParseFileURL(const nsACString &inURL
}
outDirectory.Truncate();
outFileBaseName.Truncate();
outFileExtension.Truncate();
const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
const char *url = flatURL.get();
-
- uint32_t schemeBeg, schemeEnd;
- rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr);
+
+ nsAutoCString scheme;
+ rv = net_ExtractURLScheme(flatURL, scheme);
if (NS_FAILED(rv)) return rv;
- if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {
+ if (!scheme.EqualsLiteral("file")) {
NS_ERROR("must be a file:// url");
return NS_ERROR_UNEXPECTED;
}
nsIURLParser *parser = net_GetNoAuthURLParser();
NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
@@ -478,67 +479,62 @@ net_ResolveRelativePath(const nsACString
result = path;
return NS_OK;
}
//----------------------------------------------------------------------------
// scheme fu
//----------------------------------------------------------------------------
+#if !defined(MOZILLA_XPCOMRT_API)
+static bool isAsciiAlpha(char c) {
+ return nsCRT::IsAsciiAlpha(c);
+}
+
+static bool
+net_IsValidSchemeChar(const char aChar)
+{
+ if (nsCRT::IsAsciiAlpha(aChar) || nsCRT::IsAsciiDigit(aChar) ||
+ aChar == '+' || aChar == '.' || aChar == '-') {
+ return true;
+ }
+ return false;
+}
+#endif
+
/* Extract URI-Scheme if possible */
nsresult
net_ExtractURLScheme(const nsACString &inURI,
- uint32_t *startPos,
- uint32_t *endPos,
- nsACString *scheme)
+ nsACString& scheme)
{
- // search for something up to a colon, and call it the scheme
- const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);
- const char* uri_start = flatURI.get();
- const char* uri = uri_start;
+#if !defined(MOZILLA_XPCOMRT_API)
+ Tokenizer p(inURI, "\r\n\t");
- if (!uri)
- return NS_ERROR_MALFORMED_URI;
-
- // skip leading white space
- while (nsCRT::IsAsciiSpace(*uri))
- uri++;
-
- uint32_t start = uri - uri_start;
- if (startPos) {
- *startPos = start;
+ while (p.CheckWhite() || p.CheckChar(' ')) {
+ // Skip leading whitespace
}
- uint32_t length = 0;
- char c;
- while ((c = *uri++) != '\0') {
- // First char must be Alpha
- if (length == 0 && nsCRT::IsAsciiAlpha(c)) {
- length++;
- }
- // Next chars can be alpha + digit + some special chars
- else if (length > 0 && (nsCRT::IsAsciiAlpha(c) ||
- nsCRT::IsAsciiDigit(c) || c == '+' ||
- c == '.' || c == '-')) {
- length++;
- }
- // stop if colon reached but not as first char
- else if (c == ':' && length > 0) {
- if (endPos) {
- *endPos = start + length;
- }
+ p.Record();
+ if (!p.CheckChar(isAsciiAlpha)) {
+ // First char must be alpha
+ return NS_ERROR_MALFORMED_URI;
+ }
- if (scheme)
- scheme->Assign(Substring(inURI, start, length));
- return NS_OK;
- }
- else
- break;
+ while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
+ // Skip valid scheme characters or \r\n\t
}
- return NS_ERROR_MALFORMED_URI;
+
+ if (!p.CheckChar(':')) {
+ return NS_ERROR_MALFORMED_URI;
+ }
+
+ p.Claim(scheme);
+ scheme.StripChars("\r\n\t");
+#endif
+ return NS_OK;
}
bool
net_IsValidScheme(const char *scheme, uint32_t schemeLen)
{
// first char must be alpha
if (!nsCRT::IsAsciiAlpha(*scheme))
return false;
@@ -552,96 +548,82 @@ net_IsValidScheme(const char *scheme, ui
*scheme == '-'))
return false;
}
return true;
}
bool
+net_IsAbsoluteURL(const nsACString& uri)
+{
+#if !defined(MOZILLA_XPCOMRT_API)
+ Tokenizer p(uri, "\r\n\t");
+
+ while (p.CheckWhite() || p.CheckChar(' ')) {
+ // Skip leading whitespace
+ }
+
+ // First char must be alpha
+ if (!p.CheckChar(isAsciiAlpha)) {
+ return false;
+ }
+
+ while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
+ // Skip valid scheme characters or \r\n\t
+ }
+ if (!p.CheckChar(':')) {
+ return false;
+ }
+ p.SkipWhites();
+
+ if (!p.CheckChar('/')) {
+ return false;
+ }
+ p.SkipWhites();
+
+ if (p.CheckChar('/')) {
+ // aSpec is really absolute. Ignore aBaseURI in this case
+ return true;
+ }
+#endif
+ return false;
+}
+
+bool
net_FilterURIString(const char *str, nsACString& result)
{
NS_PRECONDITION(str, "Must have a non-null string!");
- bool writing = false;
result.Truncate();
const char *p = str;
- // Remove leading spaces, tabs, CR, LF if any.
- while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
- writing = true;
- str = p + 1;
+ // Figure out if we need to filter anything.
+ bool writing = false;
+ while (*p) {
+ if (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
+ writing = true;
+ break;
+ }
p++;
}
- // Don't strip from the scheme, because other code assumes everything
- // up to the ':' is the scheme, and it's bad not to have it match.
- // If there's no ':', strip.
- bool found_colon = false;
- const char *first = nullptr;
- while (*p) {
- switch (*p) {
- case '\t':
- case '\r':
- case '\n':
- if (found_colon) {
- writing = true;
- // append chars up to but not including *p
- if (p > str)
- result.Append(str, p - str);
- str = p + 1;
- } else {
- // remember where the first \t\r\n was in case we find no scheme
- if (!first)
- first = p;
- }
- break;
-
- case ':':
- found_colon = true;
- break;
-
- case '/':
- case '@':
- if (!found_colon) {
- // colon also has to precede / or @ to be a scheme
- found_colon = true; // not really, but means ok to strip
- if (first) {
- // go back and replace
- p = first;
- continue; // process *p again
- }
- }
- break;
-
- default:
- break;
- }
- p++;
-
- // At end, if there was no scheme, and we hit a control char, fix
- // it up now.
- if (!*p && first != nullptr && !found_colon) {
- // TRICKY - to avoid duplicating code, we reset the loop back
- // to the point we found something to do
- p = first;
- // This also stops us from looping after we finish
- found_colon = true; // so we'll replace \t\r\n
- }
+ if (!writing) {
+ // Nothing to strip or filter
+ return false;
}
- // Remove trailing spaces if any
- while (((p-1) >= str) && (*(p-1) == ' ')) {
- writing = true;
- p--;
- }
+ nsAutoCString temp;
- if (writing && p > str)
- result.Append(str, p - str);
+ temp.Assign(str);
+ temp.Trim("\r\n\t ");
+ temp.StripChars("\r\n\t");
- return writing;
+ result.Assign(temp);
+
+ return true;
}
#if defined(XP_WIN)
bool
net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
{
bool writing = false;
--- a/netwerk/base/nsURLHelper.h
+++ b/netwerk/base/nsURLHelper.h
@@ -75,27 +75,31 @@ void net_CoalesceDirs(netCoalesceFlags f
*
* @return a new string, representing canonical uri
*/
nsresult net_ResolveRelativePath(const nsACString &relativePath,
const nsACString &basePath,
nsACString &result);
/**
+ * Check if a URL is absolute
+ *
+ * @param inURL URL spec
+ * @return true if the given spec represents an absolute URL
+ */
+bool net_IsAbsoluteURL(const nsACString& inURL);
+
+/**
* Extract URI-Scheme if possible
*
* @param inURI URI spec
- * @param startPos start of scheme (may be null)
- * @param endPos end of scheme; index of colon (may be null)
* @param scheme scheme copied to this buffer on return (may be null)
*/
nsresult net_ExtractURLScheme(const nsACString &inURI,
- uint32_t *startPos,
- uint32_t *endPos,
- nsACString *scheme = nullptr);
+ nsACString &scheme);
/* check that the given scheme conforms to RFC 2396 */
bool net_IsValidScheme(const char *scheme, uint32_t schemeLen);
inline bool net_IsValidScheme(const nsAFlatCString &scheme)
{
return net_IsValidScheme(scheme.get(), scheme.Length());
}
@@ -104,18 +108,17 @@ inline bool net_IsValidScheme(const nsAF
* Filter out whitespace from a URI string. The input is the |str|
* pointer. |result| is written to if and only if there is whitespace that has
* to be filtered out. The return value is true if and only if |result| is
* written to.
*
* This function strips out all whitespace at the beginning and end of the URL
* and strips out \r, \n, \t from the middle of the URL. This makes it safe to
* call on things like javascript: urls or data: urls, where we may in fact run
- * into whitespace that is not properly encoded. Note that stripping does not
- * occur in the scheme portion itself.
+ * into whitespace that is not properly encoded.
*
* @param str the pointer to the string to filter. Must be non-null.
* @param result the out param to write to if filtering happens
* @return whether result was written to
*/
bool net_FilterURIString(const char *str, nsACString& result);
#if defined(XP_WIN)
--- a/netwerk/protocol/http/Http2Stream.cpp
+++ b/netwerk/protocol/http/Http2Stream.cpp
@@ -339,17 +339,17 @@ Http2Stream::WriteSegments(nsAHttpSegmen
mSegmentWriter = nullptr;
return rv;
}
nsresult
Http2Stream::MakeOriginURL(const nsACString &origin, RefPtr<nsStandardURL> &url)
{
nsAutoCString scheme;
- nsresult rv = net_ExtractURLScheme(origin, nullptr, nullptr, &scheme);
+ nsresult rv = net_ExtractURLScheme(origin, scheme);
NS_ENSURE_SUCCESS(rv, rv);
return MakeOriginURL(scheme, origin, url);
}
nsresult
Http2Stream::MakeOriginURL(const nsACString &scheme, const nsACString &origin,
RefPtr<nsStandardURL> &url)
{
--- a/netwerk/protocol/res/SubstitutingProtocolHandler.cpp
+++ b/netwerk/protocol/res/SubstitutingProtocolHandler.cpp
@@ -49,17 +49,17 @@ SubstitutingURL::EnsureFile()
MOZ_ASSERT(substHandler);
nsAutoCString spec;
rv = substHandler->ResolveURI(this, spec);
if (NS_FAILED(rv))
return rv;
nsAutoCString scheme;
- rv = net_ExtractURLScheme(spec, nullptr, nullptr, &scheme);
+ rv = net_ExtractURLScheme(spec, scheme);
if (NS_FAILED(rv))
return rv;
// Bug 585869:
// In most cases, the scheme is jar if it's not file.
// Regardless, net_GetFileFromURLSpec should be avoided
// when the scheme isn't file.
if (!scheme.EqualsLiteral("file"))
--- a/netwerk/streamconv/converters/nsIndexedToHTML.cpp
+++ b/netwerk/streamconv/converters/nsIndexedToHTML.cpp
@@ -755,18 +755,20 @@ nsIndexedToHTML::OnIndexAvailable(nsIReq
loc.Append('/');
}
// now minimally re-escape the location...
uint32_t escFlags;
// for some protocols, we expect the location to be absolute.
// if so, and if the location indeed appears to be a valid URI, then go
// ahead and treat it like one.
+
+ nsAutoCString scheme;
if (mExpectAbsLoc &&
- NS_SUCCEEDED(net_ExtractURLScheme(loc, nullptr, nullptr, nullptr))) {
+ NS_SUCCEEDED(net_ExtractURLScheme(loc, scheme))) {
// escape as absolute
escFlags = esc_Forced | esc_AlwaysCopy | esc_Minimal;
}
else {
// escape as relative
// esc_Directory is needed because directories have a trailing slash.
// Without it, the trailing '/' will be escaped, and links from within
// that directory will be incorrect
--- a/netwerk/test/unit/test_standardurl.js
+++ b/netwerk/test/unit/test_standardurl.js
@@ -268,21 +268,28 @@ function test_percentDecoding()
var url = stringToURL("http://%70%61%73%74%65%62%69%6E.com");
do_check_eq(url.spec, "http://pastebin.com/");
// We shouldn't unescape characters that are not allowed in the hostname.
url = stringToURL("http://example.com%0a%23.google.com/");
do_check_eq(url.spec, "http://example.com%0a%23.google.com/");
}
+function test_filterWhitespace()
+{
+ var url = stringToURL(" \r\n\th\nt\rt\tp://ex\r\n\tample.com/path\r\n\t/\r\n\tto the/fil\r\n\te.e\r\n\txt?que\r\n\try#ha\r\n\tsh \r\n\t ");
+ do_check_eq(url.spec, "http://example.com/path/to%20the/file.ext?query#hash");
+}
+
function run_test()
{
test_setEmptyPath();
test_setQuery();
test_setRef();
test_ipv6();
test_ipv6_fail();
test_clearedSpec();
test_escapeBrackets();
test_apostropheEncoding();
test_accentEncoding();
test_percentDecoding();
+ test_filterWhitespace();
}