Bug 1364974: Part 5 - Perform off-thread decode operations in chunks, rather than singly. r?shu
MozReview-Commit-ID: DapDuQ8rdTI
--- a/js/xpconnect/loader/ScriptPreloader.cpp
+++ b/js/xpconnect/loader/ScriptPreloader.cpp
@@ -262,27 +262,24 @@ ScriptPreloader::Cleanup()
JS_RemoveExtraGCRootsTracer(jsapi.cx(), TraceOp, this);
UnregisterWeakMemoryReporter(this);
}
void
ScriptPreloader::FlushCache()
{
+ mMonitor.AssertNotCurrentThreadOwns();
MonitorAutoLock mal(mMonitor);
- for (auto& script : IterHash(mScripts)) {
- // We can only purge finished scripts here. Async scripts that are
- // still being parsed off-thread have a non-refcounted reference to
- // this script, which needs to stay alive until they finish parsing.
- if (script->mReadyToExecute) {
- script->Cancel();
- script.Remove();
- }
- }
+ mParsingScripts.clearAndFree();
+ while (auto script = mPendingScripts.getFirst())
+ script->remove();
+ for (auto& script : IterHash(mScripts))
+ script.Remove();
// If we've already finished saving the cache at this point, start a new
// delayed save operation. This will write out an empty cache file in place
// of any cache file we've already written out this session, which will
// prevent us from falling back to the current session's cache file on the
// next startup.
if (mSaveComplete && mChildCache) {
mSaveComplete = false;
@@ -428,23 +425,23 @@ ScriptPreloader::InitCacheInternal()
headerSize = LittleEndian::readUint32(data.get());
data += sizeof(headerSize);
if (data + headerSize > end) {
return Err(NS_ERROR_UNEXPECTED);
}
- AutoTArray<CachedScript*, 256> scripts;
-
{
auto cleanup = MakeScopeExit([&] () {
mScripts.Clear();
});
+ LinkedList<CachedScript> scripts;
+
Range<uint8_t> header(data, data + headerSize);
data += headerSize;
InputBuffer buf(header);
size_t offset = 0;
while (!buf.finished()) {
auto script = MakeUnique<CachedScript>(*this, buf);
@@ -459,51 +456,37 @@ ScriptPreloader::InitCacheInternal()
// size, as a basic sanity check.
if (script->mOffset != offset) {
return Err(NS_ERROR_UNEXPECTED);
}
offset += script->mSize;
script->mXDRRange.emplace(scriptData, scriptData + script->mSize);
- scripts.AppendElement(script.get());
+ // Don't pre-decode the script unless it was used in this process type during the
+ // previous session.
+ if (script->mOriginalProcessTypes.contains(CurrentProcessType())) {
+ scripts.insertBack(script.get());
+ } else {
+ script->mReadyToExecute = true;
+ }
+
mScripts.Put(script->mCachePath, script.get());
Unused << script.release();
}
if (buf.error()) {
return Err(NS_ERROR_UNEXPECTED);
}
+ mPendingScripts = Move(scripts);
cleanup.release();
}
- AutoJSAPI jsapi;
- MOZ_RELEASE_ASSERT(jsapi.Init(xpc::CompilationScope()));
- JSContext* cx = jsapi.cx();
-
- auto start = TimeStamp::Now();
- LOG(Info, "Off-thread decoding scripts...\n");
-
- JS::CompileOptions options(cx, JSVERSION_LATEST);
-
- for (auto& script : scripts) {
- // Only async decode scripts which have been used in this process type.
- if (script->mProcessTypes.contains(CurrentProcessType()) &&
- script->AsyncDecodable() &&
- JS::CanCompileOffThread(cx, options, script->mSize)) {
- DecodeScriptOffThread(cx, script);
- } else {
- script->mReadyToExecute = true;
- }
- }
-
- LOG(Info, "Initialized decoding in %fms\n",
- (TimeStamp::Now() - start).ToMilliseconds());
-
+ DecodeNextBatch(OFF_THREAD_FIRST_CHUNK_SIZE);
return Ok();
}
static inline Result<Ok, nsresult>
Write(PRFileDesc* fd, const void* data, int32_t len)
{
if (PR_Write(fd, data, len) != len) {
return Err(NS_ERROR_FAILURE);
@@ -734,86 +717,206 @@ ScriptPreloader::GetCachedScript(JSConte
}
return nullptr;
}
JSScript*
ScriptPreloader::WaitForCachedScript(JSContext* cx, CachedScript* script)
{
+ // Check for finished operations before locking so that we can move onto
+ // decoding the next batch as soon as possible after the pending batch is
+ // ready. If we wait until we hit an unfinished script, we wind up having at
+ // most one batch of buffered scripts, and occasionally under-running that
+ // buffer.
+ if (mToken) {
+ FinishOffThreadDecode();
+ }
+
if (!script->mReadyToExecute) {
LOG(Info, "Must wait for async script load: %s\n", script->mURL.get());
auto start = TimeStamp::Now();
+ mMonitor.AssertNotCurrentThreadOwns();
MonitorAutoLock mal(mMonitor);
+ // Check for finished operations again *after* locking, or we may race
+ // against mToken being set between our last check and the time we
+ // entered the mutex.
+ FinishOffThreadDecode();
+
if (!script->mReadyToExecute && script->mSize < MAX_MAINTHREAD_DECODE_SIZE) {
LOG(Info, "Script is small enough to recompile on main thread\n");
script->mReadyToExecute = true;
} else {
while (!script->mReadyToExecute) {
mal.Wait();
+
+ MonitorAutoUnlock mau(mMonitor);
+ FinishOffThreadDecode();
}
}
- LOG(Info, "Waited %fms\n", (TimeStamp::Now() - start).ToMilliseconds());
+ LOG(Debug, "Waited %fms\n", (TimeStamp::Now() - start).ToMilliseconds());
}
return script->GetJSScript(cx);
}
+
+/* static */ void
+ScriptPreloader::OffThreadDecodeCallback(void* token, void* context)
+{
+ auto cache = static_cast<ScriptPreloader*>(context);
+
+ cache->mMonitor.AssertNotCurrentThreadOwns();
+ MonitorAutoLock mal(cache->mMonitor);
+
+ // First notify any tasks that are already waiting on scripts, since they'll
+ // be blocking the main thread, and prevent any runnables from executing.
+ cache->mToken = token;
+ mal.NotifyAll();
+
+ // If nothing processed the token, and we don't already have a pending
+ // runnable, then dispatch a new one to finish the processing on the main
+ // thread as soon as possible.
+ if (cache->mToken && !cache->mFinishDecodeRunnablePending) {
+ cache->mFinishDecodeRunnablePending = true;
+ NS_DispatchToMainThread(
+ NewRunnableMethod(cache, &ScriptPreloader::DoFinishOffThreadDecode));
+ }
+}
+
void
-ScriptPreloader::DecodeScriptOffThread(JSContext* cx, CachedScript* script)
+ScriptPreloader::DoFinishOffThreadDecode()
{
- JS::CompileOptions options(cx, JSVERSION_LATEST);
+ mFinishDecodeRunnablePending = false;
+ FinishOffThreadDecode();
+}
- options.setNoScriptRval(true)
- .setFileAndLine(script->mURL.get(), 1);
+void
+ScriptPreloader::FinishOffThreadDecode()
+{
+ if (!mToken) {
+ return;
+ }
+
+ auto cleanup = MakeScopeExit([&] () {
+ mToken = nullptr;
+ mParsingSources.clear();
+ mParsingScripts.clear();
+
+ DecodeNextBatch();
+ });
+
+ AutoJSAPI jsapi;
+ MOZ_RELEASE_ASSERT(jsapi.Init(xpc::CompilationScope()));
- if (!JS::DecodeOffThreadScript(cx, options, script->Range(),
- OffThreadDecodeCallback,
- static_cast<void*>(script))) {
+ JSContext* cx = jsapi.cx();
+ JS::Rooted<JS::ScriptVector> jsScripts(cx, JS::ScriptVector(cx));
+
+ // If this fails, we still need to mark the scripts as finished. Any that
+ // weren't successfully compiled in this operation (which should never
+ // happen under ordinary circumstances) will be re-decoded on the main
+ // thread, and raise the appropriate errors when they're executed.
+ //
+ // The exception from the off-thread decode operation will be reported when
+ // we pop the AutoJSAPI off the stack.
+ Unused << JS::FinishOffThreadScriptsDecoder(cx, mToken, &jsScripts);
+
+ unsigned i = 0;
+ for (auto script : mParsingScripts) {
+ LOG(Debug, "Finished off-thread decode of %s\n", script->mURL.get());
+ if (i < jsScripts.length())
+ script->mScript = jsScripts[i++];
script->mReadyToExecute = true;
}
}
void
-ScriptPreloader::CancelOffThreadParse(void* token)
+ScriptPreloader::DecodeNextBatch(size_t chunkSize)
{
- AutoSafeJSAPI jsapi;
- JS::CancelOffThreadScriptDecoder(jsapi.cx(), token);
-}
+ MOZ_ASSERT(mParsingSources.length() == 0);
+ MOZ_ASSERT(mParsingScripts.length() == 0);
+
+ auto cleanup = MakeScopeExit([&] () {
+ mParsingScripts.clearAndFree();
+ mParsingSources.clearAndFree();
+ });
-/* static */ void
-ScriptPreloader::OffThreadDecodeCallback(void* token, void* context)
-{
- auto script = static_cast<CachedScript*>(context);
+ auto start = TimeStamp::Now();
+ LOG(Debug, "Off-thread decoding scripts...\n");
+
+ size_t size = 0;
+ for (CachedScript* next = mPendingScripts.getFirst(); next;) {
+ auto script = next;
+ next = script->getNext();
- MonitorAutoLock mal(script->mCache.mMonitor);
+ // Skip any scripts that we decoded on the main thread rather than
+ // waiting for an off-thread operation to complete.
+ if (script->mReadyToExecute) {
+ script->remove();
+ continue;
+ }
+ // If we have enough data for one chunk and this script would put us
+ // over our chunk size limit, we're done.
+ if (size > MIN_OFF_THREAD_DECODE_SIZE &&
+ size + script->mSize > chunkSize) {
+ break;
+ }
+ if (!mParsingScripts.append(script) ||
+ !mParsingSources.emplaceBack(script->Range(), script->mURL.get(), 0)) {
+ break;
+ }
- if (script->mReadyToExecute) {
- // We've already executed this script on the main thread, and opted to
- // main thread decode it rather waiting for off-thread decoding to
- // finish. So just cancel the off-thread parse rather than completing
- // it.
- NS_DispatchToMainThread(
- NewRunnableMethod<void*>(&script->mCache,
- &ScriptPreloader::CancelOffThreadParse,
- token));
+ LOG(Debug, "Beginning off-thread decode of script %s (%u bytes)\n",
+ script->mURL.get(), script->mSize);
+
+ script->remove();
+ size += script->mSize;
+ }
+
+ if (size == 0 && mPendingScripts.isEmpty()) {
return;
}
- script->mToken = token;
- script->mReadyToExecute = true;
+ AutoJSAPI jsapi;
+ MOZ_RELEASE_ASSERT(jsapi.Init(xpc::CompilationScope()));
+ JSContext* cx = jsapi.cx();
+
+ JS::CompileOptions options(cx, JSVERSION_LATEST);
+ options.setNoScriptRval(true);
- mal.NotifyAll();
+ if (!JS::CanCompileOffThread(cx, options, size) ||
+ !JS::DecodeOffThreadScripts(cx, options, mParsingSources,
+ OffThreadDecodeCallback,
+ static_cast<void*>(this))) {
+ // If we fail here, we don't move on to process the next batch, so make
+ // sure we don't have any other scripts left to process.
+ MOZ_ASSERT(mPendingScripts.isEmpty());
+ for (auto script : mPendingScripts) {
+ script->mReadyToExecute = true;
+ }
+
+ LOG(Info, "Can't decode %lu bytes of scripts off-thread", size);
+ for (auto script : mParsingScripts) {
+ script->mReadyToExecute = true;
+ }
+ return;
+ }
+
+ cleanup.release();
+
+ LOG(Debug, "Initialized decoding of %lu scripts (%lu bytes) in %fms\n",
+ mParsingSources.length(), size, (TimeStamp::Now() - start).ToMilliseconds());
}
+
ScriptPreloader::CachedScript::CachedScript(ScriptPreloader& cache, InputBuffer& buf)
: mCache(cache)
{
Code(buf);
mOriginalProcessTypes = mProcessTypes;
mProcessTypes = {};
}
@@ -829,67 +932,45 @@ ScriptPreloader::CachedScript::XDREncode
if (code == JS::TranscodeResult_Ok) {
mXDRRange.emplace(Buffer().begin(), Buffer().length());
return true;
}
JS_ClearPendingException(cx);
return false;
}
-void
-ScriptPreloader::CachedScript::Cancel()
-{
- if (mToken) {
- mCache.mMonitor.AssertCurrentThreadOwns();
-
- AutoSafeJSAPI jsapi;
- JS::CancelOffThreadScriptDecoder(jsapi.cx(), mToken);
-
- mReadyToExecute = true;
- mToken = nullptr;
- }
-}
-
JSScript*
ScriptPreloader::CachedScript::GetJSScript(JSContext* cx)
{
MOZ_ASSERT(mReadyToExecute);
if (mScript) {
return mScript;
}
- // If we have no token at this point, the script was too small to decode
+ // If we have no script at this point, the script was too small to decode
// off-thread, or it was needed before the off-thread compilation was
// finished, and is small enough to decode on the main thread rather than
// wait for the off-thread decoding to finish. In either case, we decode
// it synchronously the first time it's needed.
- if (!mToken) {
- MOZ_ASSERT(HasRange());
+ MOZ_ASSERT(HasRange());
- JS::RootedScript script(cx);
- if (JS::DecodeScript(cx, Range(), &script)) {
- mScript = script;
+ auto start = TimeStamp::Now();
+ LOG(Info, "Decoding script %s on main thread...\n", mURL.get());
- if (mCache.mSaveComplete) {
- FreeData();
- }
+ JS::RootedScript script(cx);
+ if (JS::DecodeScript(cx, Range(), &script)) {
+ mScript = script;
+
+ if (mCache.mSaveComplete) {
+ FreeData();
}
-
- return mScript;
}
- Maybe<JSAutoCompartment> ac;
- if (JS::CompartmentCreationOptionsRef(cx).addonIdOrNull()) {
- // Make sure we never try to finish the parse in a compartment with an
- // add-on ID, it wasn't started in one.
- ac.emplace(cx, xpc::CompilationScope());
- }
+ LOG(Debug, "Finished decoding in %fms", (TimeStamp::Now() - start).ToMilliseconds());
- mScript = JS::FinishOffThreadScriptDecoder(cx, mToken);
- mToken = nullptr;
return mScript;
}
NS_IMPL_ISUPPORTS(ScriptPreloader, nsIObserver, nsIRunnable, nsIMemoryReporter)
#undef LOG
} // namespace mozilla
--- a/js/xpconnect/loader/ScriptPreloader.h
+++ b/js/xpconnect/loader/ScriptPreloader.h
@@ -132,17 +132,17 @@ private:
// file. In this case, mReadyToExecute is true, and mScript is non-null.
//
// A script to be added to the next session's cache file always has a
// non-null mScript value. If it was read from the last session's cache
// file, it also has a non-empty mXDRRange range, which will be stored in
// the next session's cache file. If it was compiled in this session, its
// mXDRRange will initially be empty, and its mXDRData buffer will be
// populated just before it is written to the cache file.
- class CachedScript
+ class CachedScript : public LinkedListElement<CachedScript>
{
public:
CachedScript(CachedScript&&) = default;
CachedScript(ScriptPreloader& cache, const nsCString& url, const nsCString& cachePath, JSScript* script)
: mCache(cache)
, mURL(url)
, mCachePath(cachePath)
@@ -156,55 +156,44 @@ private:
ScriptStatus Status() const
{
return mProcessTypes.isEmpty() ? ScriptStatus::Restored : ScriptStatus::Saved;
}
// For use with nsTArray::Sort.
//
- // Orders scripts by:
- //
- // 1) Async-decoded scripts before sync-decoded scripts, since the
- // former are needed immediately at startup, and should be stored
- // contiguously.
- // 2) Script load time, so that scripts which are needed earlier are
- // stored earlier, and scripts needed at approximately the same
- // time are stored approximately contiguously.
+ // Orders scripts by script load time, so that scripts which are needed
+ // earlier are stored earlier, and scripts needed at approximately the
+ // same time are stored approximately contiguously.
struct Comparator
{
bool Equals(const CachedScript* a, const CachedScript* b) const
{
- return (a->AsyncDecodable() == b->AsyncDecodable() &&
- a->mLoadTime == b->mLoadTime);
+ return a->mLoadTime == b->mLoadTime;
}
bool LessThan(const CachedScript* a, const CachedScript* b) const
{
- if (a->AsyncDecodable() != b->AsyncDecodable()) {
- return a->AsyncDecodable();
- }
return a->mLoadTime < b->mLoadTime;
}
};
struct StatusMatcher final : public Matcher<CachedScript*>
{
explicit StatusMatcher(ScriptStatus status) : mStatus(status) {}
virtual bool Matches(CachedScript* script)
{
return script->Status() == mStatus;
}
const ScriptStatus mStatus;
};
- void Cancel();
-
void FreeData()
{
// If the script data isn't mmapped, we need to release both it
// and the Range that points to it at the same time.
if (!mXDRData.empty()) {
mXDRRange.reset();
mXDRData.destroy();
}
@@ -212,18 +201,16 @@ private:
void UpdateLoadTime(const TimeStamp& loadTime)
{
if (mLoadTime.IsNull() || loadTime < mLoadTime) {
mLoadTime = loadTime;
}
}
- bool AsyncDecodable() const { return mSize > MIN_OFFTHREAD_SIZE; }
-
// Encodes this script into XDR data, and stores the result in mXDRData.
// Returns true on success, false on failure.
bool XDREncode(JSContext* cx);
// Encodes or decodes this script, in the storage format required by the
// script cache file.
template<typename Buffer>
void Code(Buffer& buffer)
@@ -301,20 +288,16 @@ private:
JS::Heap<JSScript*> mScript;
// True if this script is ready to be executed. This means that either the
// off-thread portion of an off-thread decode has finished, or the script
// is too small to be decoded off-thread, and may be immediately decoded
// whenever it is first executed.
bool mReadyToExecute = false;
- // The off-thread decode token for a completed off-thread decode, which
- // has not yet been finalized on the main thread.
- void* mToken = nullptr;
-
// The set of processes in which this script has been used.
EnumSet<ProcessType> mProcessTypes{};
// The set of processes which the script was loaded into during the
// last session, as read from the cache file.
EnumSet<ProcessType> mOriginalProcessTypes{};
// The read-only XDR data for this script, which was either read from an
@@ -329,25 +312,41 @@ private:
template <ScriptStatus status>
static Matcher<CachedScript*>* Match()
{
static CachedScript::StatusMatcher matcher{status};
return &matcher;
}
- // There's a trade-off between the time it takes to setup an off-thread
- // decode and the time we save by doing the decode off-thread. At this
- // point, the setup is quite expensive, and 20K is about where we start to
- // see an improvement rather than a regression.
+ // There's a significant setup cost for each off-thread decode operation,
+ // so scripts are decoded in chunks to minimize the overhead. There's a
+ // careful balancing act in choosing the size of chunks, to minimize the
+ // number of decode operations, while also minimizing the number of buffer
+ // underruns that require the main thread to wait for a script to finish
+ // decoding.
//
- // This also means that we get much better performance loading one big
- // script than several small scripts, since the setup is per-script, and the
- // OMT compile is almost always complete by the time we need a given script.
- static constexpr int MIN_OFFTHREAD_SIZE = 20 * 1024;
+ // For the first chunk, we don't have much time between the start of the
+ // decode operation and the time the first script is needed, so that chunk
+ // needs to be fairly small. After the first chunk is finished, we have
+ // some buffered scripts to fall back on, and a lot more breathing room,
+ // so the chunks can be a bit bigger, but still not too big.
+ static constexpr int OFF_THREAD_FIRST_CHUNK_SIZE = 128 * 1024;
+ static constexpr int OFF_THREAD_CHUNK_SIZE = 512 * 1024;
+
+ // Ideally, we want every chunk to be smaller than the chunk sizes
+ // specified above. However, if we have some number of small scripts
+ // followed by a huge script that would put us over the normal chunk size,
+ // we're better off processing them as a single chunk.
+ //
+ // In order to guarantee that the JS engine will process a chunk
+ // off-thread, it needs to be at least 100K (which is an implementation
+ // detail that can change at any time), so make sure that we always hit at
+ // least that size, with a bit of breating room to be safe.
+ static constexpr int MIN_OFF_THREAD_DECODE_SIZE = 128 * 1024;
// The maximum size of scripts to re-decode on the main thread if off-thread
// decoding hasn't finished yet. In practice, we don't hit this very often,
// but when we do, re-decoding some smaller scripts on the main thread gives
// the background decoding a chance to catch up without blocking the main
// thread for quite as long.
static constexpr int MAX_MAINTHREAD_DECODE_SIZE = 50 * 1024;
@@ -372,21 +371,21 @@ private:
// current profile.
Result<nsCOMPtr<nsIFile>, nsresult>
GetCacheFile(const nsAString& suffix);
// Waits for the given cached script to finish compiling off-thread, or
// decodes it synchronously on the main thread, as appropriate.
JSScript* WaitForCachedScript(JSContext* cx, CachedScript* script);
- // Begins decoding the given script in a background thread.
- void DecodeScriptOffThread(JSContext* cx, CachedScript* script);
+ void DecodeNextBatch(size_t chunkSize = OFF_THREAD_CHUNK_SIZE);
static void OffThreadDecodeCallback(void* token, void* context);
- void CancelOffThreadParse(void* token);
+ void FinishOffThreadDecode();
+ void DoFinishOffThreadDecode();
size_t ShallowHeapSizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf)
{
return (mallocSizeOf(this) + mScripts.ShallowSizeOfExcludingThis(mallocSizeOf) +
mallocSizeOf(mSaveThread.get()) + mallocSizeOf(mProfD.get()));
}
using ScriptHash = nsClassHashtable<nsCStringHashKey, CachedScript>;
@@ -406,16 +405,32 @@ private:
// True after we've shown the first window, and are no longer adding new
// scripts to the cache.
bool mStartupFinished = false;
bool mCacheInitialized = false;
bool mSaveComplete = false;
bool mDataPrepared = false;
+ // The list of scripts that we read from the initial startup cache file,
+ // but have yet to initiate a decode task for.
+ LinkedList<CachedScript> mPendingScripts;
+
+ // The lists of scripts and their sources that are currently being decoded
+ // off-thread.
+ JS::TranscodeSources mParsingSources;
+ Vector<CachedScript*> mParsingScripts;
+
+ // The token for the completed off-thread decode task.
+ void* mToken = nullptr;
+
+ // True if a runnable has been dispatched to the main thread to finish an
+ // off-thread decode operation.
+ bool mFinishDecodeRunnablePending = false;
+
// The process type of the current process.
static ProcessType sProcessType;
// The process types for which remote processes have been initialized, and
// are expected to send back script data.
EnumSet<ProcessType> mInitializedProcesses{};
RefPtr<ScriptPreloader> mChildCache;