Bug 1369317 - Filter and escape URI string in only one pass r=mcmanus
MozReview-Commit-ID: KU4C4cS3jZC
--- a/netwerk/base/nsSimpleURI.cpp
+++ b/netwerk/base/nsSimpleURI.cpp
@@ -292,25 +292,20 @@ nsSimpleURI::SetSpec(const nsACString &a
NS_ENSURE_STATE(mMutable);
nsresult rv = net_ExtractURLScheme(aSpec, mScheme);
if (NS_FAILED(rv)) {
return rv;
}
ToLowerCase(mScheme);
- // filter out unexpected chars "\r\n\t" if necessary
- nsAutoCString filteredSpec;
- net_FilterURIString(aSpec, filteredSpec);
-
- // nsSimpleURI currently restricts the charset to US-ASCII
nsAutoCString spec;
- rv = NS_EscapeURL(filteredSpec, esc_OnlyNonASCII, spec, fallible);
+ rv = net_FilterAndEscapeURI(aSpec, esc_OnlyNonASCII, spec);
if (NS_FAILED(rv)) {
- return rv;
+ return rv;
}
int32_t colonPos = spec.FindChar(':');
MOZ_ASSERT(colonPos != kNotFound, "A colon should be in this string");
// This sets mPath, mQuery and mRef.
return SetPathQueryRefEscaped(Substring(spec, colonPos + 1),
/* needsEscape = */ false);
}
--- a/netwerk/base/nsURLHelper.cpp
+++ b/netwerk/base/nsURLHelper.cpp
@@ -14,16 +14,17 @@
#include "nsIFile.h"
#include "nsIURLParser.h"
#include "nsCOMPtr.h"
#include "nsCRT.h"
#include "nsNetCID.h"
#include "mozilla/Preferences.h"
#include "prnetdb.h"
#include "mozilla/Tokenizer.h"
+#include "nsEscape.h"
using namespace mozilla;
//----------------------------------------------------------------------------
// Init/Shutdown
//----------------------------------------------------------------------------
static bool gInitialized = false;
@@ -623,16 +624,36 @@ net_FilterURIString(const nsACString& in
}
result.Assign(Substring(newStart, newEnd));
if (needsStrip) {
result.StripTaggedASCII(mask);
}
}
+nsresult
+net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags, nsACString& aResult)
+{
+ aResult.Truncate();
+
+ auto start = aInput.BeginReading();
+ auto end = aInput.EndReading();
+
+ // Trim off leading and trailing invalid chars.
+ auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };
+ auto newStart = std::find_if(start, end, charFilter);
+ auto newEnd = std::find_if(
+ std::reverse_iterator<decltype(end)>(end),
+ std::reverse_iterator<decltype(newStart)>(newStart),
+ charFilter).base();
+
+ const ASCIIMaskArray& mask = ASCIIMask::MaskCRLFTab();
+ return NS_EscapeAndFilterURL(Substring(newStart, newEnd), aFlags,
+ &mask, aResult, fallible);
+}
#if defined(XP_WIN)
bool
net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
{
bool writing = false;
nsACString::const_iterator beginIter, endIter;
--- a/netwerk/base/nsURLHelper.h
+++ b/netwerk/base/nsURLHelper.h
@@ -110,16 +110,28 @@ inline bool net_IsValidScheme(const nsCS
* it safe to call on things like javascript: urls or data: urls, where we may
* in fact run into whitespace that is not properly encoded.
*
* @param input the URL spec we want to filter
* @param result the out param to write to if filtering happens
*/
void net_FilterURIString(const nsACString& input, nsACString& result);
+/**
+ * This function performs character stripping just like net_FilterURIString,
+ * with the added benefit of also performing percent escaping of dissallowed
+ * characters, all in one pass. Saving one pass is very important when operating
+ * on really large strings.
+ *
+ * @param aInput the URL spec we want to filter
+ * @param aFlags the flags which control which characters we escape
+ * @param aResult the out param to write to if filtering happens
+ */
+nsresult net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags, nsACString& aResult);
+
#if defined(XP_WIN)
/**
* On Win32 and OS/2 system's a back-slash in a file:// URL is equivalent to a
* forward-slash. This function maps any back-slashes to forward-slashes.
*
* @param aURL
* The URL string to normalize (UTF-8 encoded). This can be a
* relative URL segment.
--- a/xpcom/io/nsEscape.cpp
+++ b/xpcom/io/nsEscape.cpp
@@ -6,16 +6,17 @@
#include "nsEscape.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/BinarySearch.h"
#include "nsTArray.h"
#include "nsCRT.h"
#include "plstr.h"
+#include "nsASCIIMask.h"
static const char hexCharsUpper[] = "0123456789ABCDEF";
static const char hexCharsUpperLower[] = "0123456789ABCDEFabcdef";
static const int netCharType[256] =
/* Bit 0 xalpha -- the alphas
** Bit 1 xpalpha -- as xalpha but
** converts spaces to plus and plus to %2B
@@ -377,17 +378,18 @@ static uint16_t dontNeedEscape(uint16_t
* @param aResult String that has the URL escaped portion appended to. Only
* altered if the string is URL escaped or |esc_AlwaysCopy| is specified.
* @param aDidAppend Indicates whether or not data was appended to |aResult|.
* @return NS_ERROR_INVALID_ARG, NS_ERROR_OUT_OF_MEMORY on failure.
*/
template<class T>
static nsresult
T_EscapeURL(const typename T::char_type* aPart, size_t aPartLen,
- uint32_t aFlags, T& aResult, bool& aDidAppend)
+ uint32_t aFlags, const ASCIIMaskArray* aFilterMask,
+ T& aResult, bool& aDidAppend)
{
typedef nsCharTraits<typename T::char_type> traits;
typedef typename traits::unsigned_char_type unsigned_char_type;
static_assert(sizeof(*aPart) == 1 || sizeof(*aPart) == 2,
"unexpected char type");
if (!aPart) {
NS_NOTREACHED("null pointer");
@@ -404,16 +406,29 @@ T_EscapeURL(const typename T::char_type*
typename T::char_type tempBuffer[100];
unsigned int tempBufferPos = 0;
bool previousIsNonASCII = false;
for (size_t i = 0; i < aPartLen; ++i) {
unsigned_char_type c = *src++;
+ // If there is a filter, we wish to skip any characters which match it.
+ // This is needed so we don't perform an extra pass just to extract the
+ // filtered characters.
+ if (aFilterMask && ASCIIMask::IsMasked(*aFilterMask, c)) {
+ if (!writing) {
+ if (!aResult.Append(aPart, i, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ writing = true;
+ }
+ continue;
+ }
+
// if the char has not to be escaped or whatever follows % is
// a valid escaped string, just copy the char.
//
// Also the % will not be escaped until forced
// See bugzilla bug 61269 for details why we changed this
//
// And, we will not escape non-ascii characters if requested.
// On special request we will also escape the colon even when
@@ -469,47 +484,66 @@ bool
NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags,
nsACString& aResult)
{
if (aPartLen < 0) {
aPartLen = strlen(aPart);
}
bool result = false;
- nsresult rv = T_EscapeURL(aPart, aPartLen, aFlags, aResult, result);
+ nsresult rv = T_EscapeURL(aPart, aPartLen, aFlags, nullptr, aResult, result);
if (NS_FAILED(rv)) {
::NS_ABORT_OOM(aResult.Length() * sizeof(nsACString::char_type));
}
return result;
}
nsresult
NS_EscapeURL(const nsACString& aStr, uint32_t aFlags, nsACString& aResult,
const mozilla::fallible_t&)
{
bool appended = false;
- nsresult rv = T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, aResult, appended);
+ nsresult rv = T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, nullptr, aResult, appended);
+ if (NS_FAILED(rv)) {
+ aResult.Truncate();
+ return rv;
+ }
+
+ if (!appended) {
+ aResult = aStr;
+ }
+
+ return rv;
+}
+
+nsresult
+NS_EscapeAndFilterURL(const nsACString& aStr, uint32_t aFlags,
+ const ASCIIMaskArray* aFilterMask,
+ nsACString& aResult, const mozilla::fallible_t&)
+{
+ bool appended = false;
+ nsresult rv = T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, aFilterMask, aResult, appended);
if (NS_FAILED(rv)) {
aResult.Truncate();
return rv;
}
if (!appended) {
aResult = aStr;
}
return rv;
}
const nsAString&
NS_EscapeURL(const nsAString& aStr, uint32_t aFlags, nsAString& aResult)
{
bool result = false;
- nsresult rv = T_EscapeURL<nsAString>(aStr.Data(), aStr.Length(), aFlags, aResult, result);
+ nsresult rv = T_EscapeURL<nsAString>(aStr.Data(), aStr.Length(), aFlags, nullptr, aResult, result);
if (NS_FAILED(rv)) {
::NS_ABORT_OOM(aResult.Length() * sizeof(nsAString::char_type));
}
if (result) {
return aResult;
}
--- a/xpcom/io/nsEscape.h
+++ b/xpcom/io/nsEscape.h
@@ -164,16 +164,29 @@ NS_EscapeURL(const nsACString& aStr, uin
/**
* Fallible version of NS_EscapeURL. On success aResult will point to either
* the original string or an escaped copy.
*/
nsresult
NS_EscapeURL(const nsACString& aStr, uint32_t aFlags, nsACString& aResult,
const mozilla::fallible_t&);
+// Forward declaration for nsASCIIMask.h
+typedef std::array<bool, 128> ASCIIMaskArray;
+
+/**
+ * The same as NS_EscapeURL, except it also filters out characters that match
+ * aFilterMask.
+ */
+nsresult
+NS_EscapeAndFilterURL(const nsACString& aStr, uint32_t aFlags,
+ const ASCIIMaskArray* aFilterMask,
+ nsACString& aResult, const mozilla::fallible_t&);
+
+
inline const nsACString&
NS_UnescapeURL(const nsACString& aStr, uint32_t aFlags, nsACString& aResult)
{
if (NS_UnescapeURL(aStr.Data(), aStr.Length(), aFlags, aResult)) {
return aResult;
}
return aStr;
}