Bug 1319007 - Add a watchdog to monitor long running tasks in TaskQueue.
MozReview-Commit-ID: 3CetAp30sCp
--- a/testing/profiles/prefs_general.js
+++ b/testing/profiles/prefs_general.js
@@ -36,16 +36,17 @@ user_pref("gfx.color_management.force_sr
user_pref("gfx.logging.level", 1);
user_pref("network.manage-offline-status", false);
// Disable speculative connections so they aren't reported as leaking when they're hanging around.
user_pref("network.http.speculative-parallel-limit", 0);
user_pref("dom.min_background_timeout_value", 1000);
user_pref("test.mousescroll", true);
user_pref("security.default_personal_cert", "Select Automatically"); // Need to client auth test be w/o any dialogs
user_pref("network.http.prompt-temp-redirect", false);
+user_pref("taskqueue.watchdog.timeout", 30);
user_pref("media.preload.default", 2); // default = metadata
user_pref("media.preload.auto", 3); // auto = enough
user_pref("media.cache_size", 1000);
user_pref("media.volume_scale", "0.01");
user_pref("media.test.dumpDebugInfo", true);
user_pref("media.dormant-on-pause-timeout-ms", 0); // Enter dormant immediately without waiting for timeout.
user_pref("security.warn_viewing_mixed", false);
user_pref("app.update.enabled", false);
--- a/xpcom/build/XPCOMInit.cpp
+++ b/xpcom/build/XPCOMInit.cpp
@@ -5,16 +5,17 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "base/basictypes.h"
#include "mozilla/AbstractThread.h"
#include "mozilla/Atomics.h"
#include "mozilla/Poison.h"
#include "mozilla/SharedThreadPool.h"
+#include "mozilla/TaskQueue.h"
#include "mozilla/XPCOM.h"
#include "nsXULAppAPI.h"
#include "nsXPCOMPrivate.h"
#include "nsXPCOMCIDInternal.h"
#include "mozilla/layers/ImageBridgeChild.h"
#include "mozilla/layers/CompositorBridgeParent.h"
@@ -691,17 +692,17 @@ NS_InitXPCOM2(nsIServiceManager** aResul
#endif
// Initialize the JS engine.
const char* jsInitFailureReason = JS_InitWithFailureDiagnostic();
if (jsInitFailureReason) {
NS_RUNTIMEABORT(jsInitFailureReason);
}
sInitializedJS = true;
-
+
// Init AbstractThread.
AbstractThread::InitStatics();
rv = nsComponentManagerImpl::gComponentManager->Init();
if (NS_FAILED(rv)) {
NS_RELEASE(nsComponentManagerImpl::gComponentManager);
return rv;
}
@@ -717,16 +718,18 @@ NS_InitXPCOM2(nsIServiceManager** aResul
// After autoreg, but before we actually instantiate any components,
// add any services listed in the "xpcom-directory-providers" category
// to the directory service.
nsDirectoryService::gService->RegisterCategoryProviders();
// Init SharedThreadPool (which needs the service manager).
SharedThreadPool::InitStatics();
+ TaskQueue::InitStatics();
+
// Force layout to spin up so that nsContentUtils is available for cx stack
// munging. Note that layout registers a number of static atoms, and also
// seals the static atom table, so NS_RegisterStaticAtom may not be called
// beyond this point.
nsCOMPtr<nsISupports> componentLoader =
do_GetService("@mozilla.org/moz/jsloader;1");
mozilla::scache::StartupCache::GetSingleton();
@@ -804,16 +807,17 @@ NS_InitMinimalXPCOM()
// Global cycle collector initialization.
if (!nsCycleCollector_init()) {
return NS_ERROR_UNEXPECTED;
}
AbstractThread::InitStatics();
SharedThreadPool::InitStatics();
+ TaskQueue::InitStatics();
mozilla::Telemetry::Init();
mozilla::HangMonitor::Startup();
mozilla::BackgroundHangMonitor::Startup();
return NS_OK;
}
//
--- a/xpcom/threads/TaskQueue.cpp
+++ b/xpcom/threads/TaskQueue.cpp
@@ -6,16 +6,140 @@
#include "mozilla/TaskQueue.h"
#include "nsIEventTarget.h"
#include "nsThreadUtils.h"
namespace mozilla {
+/**
+ * Monitor long running tasks and crash deliberately to dump stack traces.
+ */
+class TaskQueue::WatchDog
+{
+ friend struct WatchDogDeleter;
+public:
+ class AutoTaskGuard
+ {
+ public:
+ explicit AutoTaskGuard(WatchDog* aWatchDog)
+ : mWatchDog(aWatchDog)
+ {
+ if (mWatchDog) {
+ mWatchDog->BeginTask();
+ }
+ }
+ ~AutoTaskGuard()
+ {
+ if (mWatchDog) {
+ mWatchDog->EndTask();
+ }
+ }
+ private:
+ WatchDog* const mWatchDog;
+ };
+
+ static void InitStatics();
+ static WatchDog* Create();
+
+private:
+ WatchDog();
+ ~WatchDog();
+ static void Callback(nsITimer* aTimer, void* aClosure);
+ void Callback();
+ void BeginTask();
+ void EndTask();
+
+ static int sTimeout; // seconds
+
+ nsCOMPtr<nsITimer> mTimer;
+ // The time when this watchdog is created.
+ // Used to calculate mStartTime.
+ const TimeStamp mRefTime;
+ // The start time of a task.
+ // Denoted as seconds elapsed since mRefTime.
+ // Write on the TaskQueue thread and read on the main thread.
+ Atomic<int, ReleaseAcquire> mStartTime;
+};
+
+void
+TaskQueue::WatchDogDeleter::operator()(WatchDog* aToDelete) const
+{
+ nsCOMPtr<nsIThread> thread;
+ MOZ_ALWAYS_SUCCEEDS(NS_GetMainThread(getter_AddRefs(thread)));
+ thread->Dispatch(NS_NewRunnableFunction([aToDelete] () {
+ delete aToDelete;
+ }), NS_DISPATCH_NORMAL);
+}
+
+int TaskQueue::WatchDog::sTimeout;
+
+void
+TaskQueue::WatchDog::InitStatics()
+{
+ MOZ_ASSERT(NS_IsMainThread());
+ sTimeout = Preferences::GetInt("taskqueue.watchdog.timeout", 0);
+}
+
+TaskQueue::WatchDog*
+TaskQueue::WatchDog::Create()
+{
+ return sTimeout > 0 ? new WatchDog() : nullptr;
+}
+
+TaskQueue::WatchDog::WatchDog()
+ : mTimer(do_CreateInstance("@mozilla.org/timer;1"))
+ , mRefTime(TimeStamp::NowLoRes())
+ , mStartTime(INT32_MAX)
+{
+ nsCOMPtr<nsIThread> thread;
+ MOZ_ALWAYS_SUCCEEDS(NS_GetMainThread(getter_AddRefs(thread)));
+ mTimer->SetTarget(thread);
+ mTimer->InitWithFuncCallback(
+ Callback, this, 10000, nsITimer::TYPE_REPEATING_SLACK);
+}
+
+TaskQueue::WatchDog::~WatchDog()
+{
+ MOZ_ASSERT(NS_IsMainThread());
+ mTimer->Cancel();
+}
+
+void
+TaskQueue::WatchDog::BeginTask()
+{
+ mStartTime = (TimeStamp::NowLoRes() - mRefTime).ToSeconds();
+}
+
+void
+TaskQueue::WatchDog::EndTask()
+{
+ mStartTime = INT32_MAX;
+}
+
+void
+TaskQueue::WatchDog::Callback()
+{
+ int now = (TimeStamp::NowLoRes() - mRefTime).ToSeconds();
+ int dif = now - mStartTime;
+ if (dif > sTimeout) {
+ char buf[1024];
+ SprintfLiteral(buf, "Watchdog timed out! dif=%d, sTimeout=%d", dif, sTimeout);
+ MOZ_ReportAssertionFailure(buf, __FILE__, __LINE__);
+ MOZ_CRASH();
+ }
+}
+
+void
+TaskQueue::WatchDog::Callback(nsITimer* aTimer, void* aClosure)
+{
+ static_cast<WatchDog*>(aClosure)->Callback();
+}
+
class TaskQueue::EventTargetWrapper final : public nsIEventTarget
{
RefPtr<TaskQueue> mTaskQueue;
~EventTargetWrapper()
{
}
@@ -56,24 +180,32 @@ public:
return NS_OK;
}
NS_DECL_THREADSAFE_ISUPPORTS
};
NS_IMPL_ISUPPORTS(TaskQueue::EventTargetWrapper, nsIEventTarget)
+void
+TaskQueue::InitStatics()
+{
+ MOZ_ASSERT(NS_IsMainThread());
+ WatchDog::InitStatics();
+}
+
TaskQueue::TaskQueue(already_AddRefed<nsIEventTarget> aTarget,
bool aRequireTailDispatch)
: AbstractThread(aRequireTailDispatch)
, mTarget(aTarget)
, mQueueMonitor("TaskQueue::Queue")
, mTailDispatcher(nullptr)
, mIsRunning(false)
, mIsShutdown(false)
+ , mWatchDog(WatchDog::Create())
{
MOZ_COUNT_CTOR(TaskQueue);
}
TaskQueue::~TaskQueue()
{
MonitorAutoLock mon(mQueueMonitor);
MOZ_ASSERT(mIsShutdown);
@@ -83,16 +215,27 @@ TaskQueue::~TaskQueue()
TaskDispatcher&
TaskQueue::TailDispatcher()
{
MOZ_ASSERT(IsCurrentThreadIn());
MOZ_ASSERT(mTailDispatcher);
return *mTailDispatcher;
}
+void
+TaskQueue::MaybeResolveShutdown()
+{
+ mQueueMonitor.AssertCurrentThreadOwns();
+ if (mIsShutdown && !mIsRunning) {
+ mShutdownPromise.ResolveIfExists(true, __func__);
+ mTarget = nullptr;
+ mWatchDog = nullptr;
+ }
+}
+
// Note aRunnable is passed by ref to support conditional ownership transfer.
// See Dispatch() in TaskQueue.h for more details.
nsresult
TaskQueue::DispatchLocked(nsCOMPtr<nsIRunnable>& aRunnable,
DispatchFailureHandling aFailureHandling,
DispatchReason aReason)
{
mQueueMonitor.AssertCurrentThreadOwns();
@@ -224,16 +367,17 @@ TaskQueue::Runner::Run()
// Note that dropping the queue monitor before running the task, and
// taking the monitor again after the task has run ensures we have memory
// fences enforced. This means that if the object we're calling wasn't
// designed to be threadsafe, it will be, provided we're only calling it
// in this task queue.
{
AutoTaskGuard g(mQueue);
+ WatchDog::AutoTaskGuard w(mQueue->mWatchDog.get());
event->Run();
}
// Drop the reference to event. The event will hold a reference to the
// object it's calling, and we don't want to keep it alive, it may be
// making assumptions what holds references to it. This is especially
// the case if the object is waiting for us to shutdown, so that it
// can shutdown (like in the MediaDecoderStateMachine's SHUTDOWN case).
--- a/xpcom/threads/TaskQueue.h
+++ b/xpcom/threads/TaskQueue.h
@@ -42,19 +42,27 @@ typedef MozPromise<bool, bool, false> Sh
// the main thread. It also ensures that TQ2 and TQ3 only have a single runnable
// in TQ1 at any time.
//
// This arrangement lets you prioritize work by dispatching runnables directly
// to TQ1. You can issue many runnables for important work. Meanwhile the TQ2
// and TQ3 work will always execute at most one runnable and then yield.
class TaskQueue : public AbstractThread
{
+ class WatchDog;
class EventTargetWrapper;
+ struct WatchDogDeleter
+ {
+ void operator()(WatchDog* aToDelete) const;
+ };
+
public:
+ static void InitStatics();
+
explicit TaskQueue(already_AddRefed<nsIEventTarget> aTarget,
bool aSupportsTailDispatch = false);
TaskDispatcher& TailDispatcher() override;
TaskQueue* AsTaskQueue() override { return this; }
void Dispatch(already_AddRefed<nsIRunnable> aRunnable,
@@ -108,24 +116,17 @@ protected:
// that need to wait until the task queue is idle.
// mQueueMonitor must be held.
void AwaitIdleLocked();
nsresult DispatchLocked(nsCOMPtr<nsIRunnable>& aRunnable,
DispatchFailureHandling aFailureHandling,
DispatchReason aReason = NormalDispatch);
- void MaybeResolveShutdown()
- {
- mQueueMonitor.AssertCurrentThreadOwns();
- if (mIsShutdown && !mIsRunning) {
- mShutdownPromise.ResolveIfExists(true, __func__);
- mTarget = nullptr;
- }
- }
+ void MaybeResolveShutdown();
nsCOMPtr<nsIEventTarget> mTarget;
// Monitor that protects the queue and mIsRunning;
Monitor mQueueMonitor;
// Queue of tasks to run.
std::queue<nsCOMPtr<nsIRunnable>> mTasks;
@@ -180,16 +181,17 @@ protected:
// True if we've dispatched an event to the target to execute events from
// the queue.
bool mIsRunning;
// True if we've started our shutdown process.
bool mIsShutdown;
MozPromiseHolder<ShutdownPromise> mShutdownPromise;
+ UniquePtr<WatchDog, WatchDogDeleter> mWatchDog;
class Runner : public Runnable {
public:
explicit Runner(TaskQueue* aQueue)
: mQueue(aQueue)
{
}
NS_IMETHOD Run() override;