Bug 1135022 - Optimize ChunkSet by storing ranges instead of numbers. r=gcp
MozReview-Commit-ID: 59TkB7ennpK
--- a/toolkit/components/url-classifier/ChunkSet.cpp
+++ b/toolkit/components/url-classifier/ChunkSet.cpp
@@ -3,96 +3,276 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "ChunkSet.h"
namespace mozilla {
namespace safebrowsing {
+const size_t ChunkSet::IO_BUFFER_SIZE;
+
nsresult
ChunkSet::Serialize(nsACString& aChunkStr)
{
aChunkStr.Truncate();
-
- uint32_t i = 0;
- while (i < mChunks.Length()) {
- if (i != 0) {
+ for (const Range* range = mRanges.begin(); range != mRanges.end(); range++) {
+ if (range != mRanges.begin()) {
aChunkStr.Append(',');
}
- aChunkStr.AppendInt((int32_t)mChunks[i]);
- uint32_t first = i;
- uint32_t last = first;
- i++;
- while (i < mChunks.Length() && (mChunks[i] == mChunks[i - 1] + 1 || mChunks[i] == mChunks[i - 1])) {
- last = i++;
- }
-
- if (last != first) {
+ aChunkStr.AppendInt((int32_t)range->Begin());
+ if (range->Begin() != range->End()) {
aChunkStr.Append('-');
- aChunkStr.AppendInt((int32_t)mChunks[last]);
+ aChunkStr.AppendInt((int32_t)range->End());
}
}
return NS_OK;
}
nsresult
ChunkSet::Set(uint32_t aChunk)
{
- size_t idx = mChunks.BinaryIndexOf(aChunk);
- if (idx == nsTArray<uint32_t>::NoIndex) {
- if (!mChunks.InsertElementSorted(aChunk, fallible)) {
- return NS_ERROR_OUT_OF_MEMORY;
+ if (!Has(aChunk)) {
+ Range chunkRange(aChunk, aChunk);
+
+ if (mRanges.Length() == 0) {
+ if (!mRanges.AppendElement(chunkRange, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ return NS_OK;
+ }
+
+ if (mRanges.LastElement().Precedes(chunkRange)) {
+ mRanges.LastElement().End(aChunk);
+ } else if (chunkRange.Precedes(mRanges[0])) {
+ mRanges[0].Begin(aChunk);
+ } else {
+ ChunkSet tmp;
+ if (!tmp.mRanges.AppendElement(chunkRange, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ return Merge(tmp);
}
}
+
return NS_OK;
}
bool
ChunkSet::Has(uint32_t aChunk) const
{
- return mChunks.BinaryIndexOf(aChunk) != nsTArray<uint32_t>::NoIndex;
+ size_t idx;
+ return BinarySearchIf(mRanges, 0, mRanges.Length(),
+ Range::IntersectionComparator(Range(aChunk, aChunk)),
+ &idx);
+ // IntersectionComparator works because we create a
+ // single-chunk range.
}
nsresult
ChunkSet::Merge(const ChunkSet& aOther)
{
- const uint32_t *dupIter = aOther.mChunks.Elements();
- const uint32_t *end = aOther.mChunks.Elements() + aOther.mChunks.Length();
+ size_t oldLen = mRanges.Length();
+
+ for (const Range* mergeRange = aOther.mRanges.begin();
+ mergeRange != aOther.mRanges.end(); mergeRange++) {
+ if (!HasSubrange(*mergeRange)) {
+ if (!mRanges.InsertElementSorted(*mergeRange, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ }
+ }
- for (const uint32_t *iter = dupIter; iter != end; iter++) {
- nsresult rv = Set(*iter);
- NS_ENSURE_SUCCESS(rv, rv);
+ if (oldLen < mRanges.Length()) {
+ for (size_t i = 1; i < mRanges.Length(); i++) {
+ while (mRanges[i - 1].FoldLeft(mRanges[i])) {
+ mRanges.RemoveElementAt(i);
+
+ if (i == mRanges.Length()) {
+ return NS_OK;
+ }
+ }
+ }
}
return NS_OK;
}
+uint32_t
+ChunkSet::Length() const
+{
+ uint32_t len = 0;
+ for (const Range* range = mRanges.begin(); range != mRanges.end(); range++) {
+ len += range->Length();
+ }
+
+ return len;
+}
+
nsresult
ChunkSet::Remove(const ChunkSet& aOther)
{
- uint32_t *addIter = mChunks.Elements();
- uint32_t *end = mChunks.Elements() + mChunks.Length();
+ for (const Range* removalRange = aOther.mRanges.begin();
+ removalRange != aOther.mRanges.end(); removalRange++) {
+
+ if (mRanges.Length() == 0) {
+ return NS_OK;
+ }
+
+ if (mRanges.LastElement().End() < removalRange->Begin() ||
+ aOther.mRanges.LastElement().End() < mRanges[0].Begin()) {
+ return NS_OK;
+ }
- for (uint32_t *iter = addIter; iter != end; iter++) {
- if (!aOther.Has(*iter)) {
- *addIter = *iter;
- addIter++;
+ size_t intersectionIdx;
+ while (BinarySearchIf(mRanges, 0, mRanges.Length(),
+ Range::IntersectionComparator(*removalRange), &intersectionIdx)) {
+
+ ChunkSet remains;
+ nsresult rv = mRanges[intersectionIdx].Remove(*removalRange, remains);
+
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ mRanges.RemoveElementAt(intersectionIdx);
+ if (!mRanges.InsertElementsAt(intersectionIdx, remains.mRanges,
+ fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
}
}
- if (!mChunks.SetLength(addIter - mChunks.Elements(), fallible)) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
-
return NS_OK;
}
void
ChunkSet::Clear()
{
- mChunks.Clear();
+ mRanges.Clear();
+}
+
+nsresult
+ChunkSet::Write(nsIOutputStream* aOut)
+{
+ nsTArray<uint32_t> chunks(IO_BUFFER_SIZE);
+
+ for (const Range* range = mRanges.begin(); range != mRanges.end(); range++) {
+ for (uint32_t chunk = range->Begin(); chunk <= range->End(); chunk++) {
+ chunks.AppendElement(chunk);
+
+ if (chunks.Length() == chunks.Capacity()) {
+ nsresult rv = WriteTArray(aOut, chunks);
+
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ chunks.Clear();
+ }
+ }
+ }
+
+ nsresult rv = WriteTArray(aOut, chunks);
+
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ return NS_OK;
+}
+
+nsresult
+ChunkSet::Read(nsIInputStream* aIn, uint32_t aNumElements)
+{
+ nsTArray<uint32_t> chunks(IO_BUFFER_SIZE);
+
+ while (aNumElements != 0) {
+ chunks.Clear();
+
+ uint32_t numToRead = aNumElements > IO_BUFFER_SIZE ? IO_BUFFER_SIZE : aNumElements;
+
+ nsresult rv = ReadTArray(aIn, &chunks, numToRead);
+
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ aNumElements -= numToRead;
+
+ for (const uint32_t* c = chunks.begin(); c != chunks.end(); c++) {
+ rv = Set(*c);
+
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ }
+ }
+
+ return NS_OK;
+}
+
+bool
+ChunkSet::HasSubrange(const Range& aSubrange) const
+{
+ for (const Range* range = mRanges.begin(); range != mRanges.end(); range++) {
+ if (range->Contains(aSubrange)) {
+ return true;
+ } else if (!(aSubrange.Begin() > range->End() ||
+ range->Begin() > aSubrange.End())) {
+ // In this case, aSubrange overlaps this range but is not a subrange.
+ // because the ChunkSet implementation ensures that there are no
+ // overlapping ranges, this means that aSubrange cannot be a subrange of
+ // any of the following ranges
+ return false;
+ }
+ }
+
+ return false;
+}
+
+uint32_t
+ChunkSet::Range::Length() const
+{
+ return mEnd - mBegin + 1;
+}
+
+nsresult
+ChunkSet::Range::Remove(const Range& aRange, ChunkSet& aRemainderSet) const
+{
+ if (mBegin < aRange.mBegin && aRange.mBegin <= mEnd) {
+ // aRange overlaps & follows this range
+ Range range(mBegin, aRange.mBegin - 1);
+ if (!aRemainderSet.mRanges.AppendElement(range, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ }
+
+ if (mBegin <= aRange.mEnd && aRange.mEnd < mEnd) {
+ // aRange overlaps & precedes this range
+ Range range(aRange.mEnd + 1, mEnd);
+ if (!aRemainderSet.mRanges.AppendElement(range, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ }
+
+ return NS_OK;
+}
+
+bool
+ChunkSet::Range::FoldLeft(const Range& aRange)
+{
+ if (Contains(aRange)) {
+ return true;
+ } else if (Precedes(aRange) ||
+ (mBegin <= aRange.mBegin && aRange.mBegin <= mEnd)) {
+ mEnd = aRange.mEnd;
+ return true;
+ }
+
+ return false;
}
} // namespace safebrowsing
} // namespace mozilla
--- a/toolkit/components/url-classifier/ChunkSet.h
+++ b/toolkit/components/url-classifier/ChunkSet.h
@@ -9,41 +9,91 @@
#include "Entries.h"
#include "nsString.h"
#include "nsTArray.h"
namespace mozilla {
namespace safebrowsing {
/**
- * Store the chunk numbers as an array of uint32_t. We need chunk numbers in
- * order to ask for incremental updates from the server.
- * XXX: We should optimize this further to compress the many consecutive
- * numbers.
+ * Store the chunk numbers as an array of ranges of uint32_t.
+ * We need chunk numbers in order to ask for incremental updates from the
+ * server.
*/
class ChunkSet {
public:
- ChunkSet() {}
- ~ChunkSet() {}
-
nsresult Serialize(nsACString& aStr);
nsresult Set(uint32_t aChunk);
bool Has(uint32_t chunk) const;
nsresult Merge(const ChunkSet& aOther);
- uint32_t Length() const { return mChunks.Length(); }
+ uint32_t Length() const;
nsresult Remove(const ChunkSet& aOther);
void Clear();
- nsresult Write(nsIOutputStream* aOut) {
- return WriteTArray(aOut, mChunks);
- }
- nsresult Read(nsIInputStream* aIn, uint32_t aNumElements) {
- return ReadTArray(aIn, &mChunks, aNumElements);
- }
+ nsresult Write(nsIOutputStream* aOut);
+ nsresult Read(nsIInputStream* aIn, uint32_t aNumElements);
private:
- FallibleTArray<uint32_t> mChunks;
+ class Range {
+ public:
+ Range(uint32_t aBegin, uint32_t aEnd) : mBegin(aBegin), mEnd(aEnd) {}
+
+ uint32_t Length() const;
+ nsresult Remove(const Range& aRange, ChunkSet& aRemainderSet) const;
+ bool FoldLeft(const Range& aRange);
+
+ bool operator==(const Range& rhs) const {
+ return mBegin == rhs.mBegin;
+ }
+ bool operator<(const Range& rhs) const {
+ return mBegin < rhs.mBegin;
+ }
+
+ uint32_t Begin() const {
+ return mBegin;
+ }
+ void Begin(const uint32_t aBegin) {
+ mBegin = aBegin;
+ }
+ uint32_t End() const {
+ return mEnd;
+ }
+ void End(const uint32_t aEnd) {
+ mEnd = aEnd;
+ }
+
+ bool Contains(const Range& aRange) const {
+ return mBegin <= aRange.mBegin && aRange.mEnd <= mEnd;
+ }
+ bool Precedes(const Range& aRange) const {
+ return mEnd + 1 == aRange.mBegin;
+ }
+
+ struct IntersectionComparator {
+ int operator()(const Range& aRange) const {
+ if (aRange.mBegin > mTarget.mEnd) {
+ return -1;
+ }
+ if (mTarget.mBegin > aRange.mEnd) {
+ return 1;
+ }
+ return 0;
+ }
+
+ explicit IntersectionComparator(const Range& aTarget) : mTarget(aTarget){}
+ const Range& mTarget;
+ };
+
+ private:
+ uint32_t mBegin;
+ uint32_t mEnd;
+ };
+
+ static const size_t IO_BUFFER_SIZE = 1024;
+ FallibleTArray<Range> mRanges;
+
+ bool HasSubrange(const Range& aSubrange) const;
};
} // namespace safebrowsing
} // namespace mozilla
#endif