Bug 1331696 - P3. Remove direct audio support from speech synth. r?eeejay draft
authorJean-Yves Avenard <jyavenard@mozilla.com>
Sat, 02 Dec 2017 09:13:35 +0100
changeset 708921 7f8295dfdf30fdcce4a9003d1b97955b1ab3efc6
parent 708357 9a3ed521a2d8f0c7bb52a06ff99f5813af4d333c
child 743276 7c2a1a182e479ded66791bbb82d7e32d10f4738c
push id92483
push userbmo:jyavenard@mozilla.com
push dateThu, 07 Dec 2017 10:18:21 +0000
reviewerseeejay
bugs1331696
milestone59.0a1
Bug 1331696 - P3. Remove direct audio support from speech synth. r?eeejay MozReview-Commit-ID: E1UKOOvwUOe
dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
dom/media/webspeech/synth/nsISpeechService.idl
dom/media/webspeech/synth/nsSpeechTask.cpp
dom/media/webspeech/synth/nsSpeechTask.h
dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
dom/media/webspeech/synth/nsSynthVoiceRegistry.h
dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp
dom/media/webspeech/synth/test/nsFakeSynthServices.cpp
dom/media/webspeech/synth/test/nsFakeSynthServices.h
dom/media/webspeech/synth/windows/SapiService.cpp
toolkit/components/narrate/test/browser_narrate.js
toolkit/components/narrate/test/browser_narrate_language.js
toolkit/components/narrate/test/browser_voiceselect.js
toolkit/components/narrate/test/browser_word_highlight.js
--- a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
+++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
@@ -434,37 +434,30 @@ OSXSpeechSynthesizerService::Speak(const
       offsets.AppendElements(strlen(DLIM_ESCAPE_END));
     } else {
       escapedText.Append(aText[i]);
       offsets.AppendElement(i);
     }
   }
 
   RefPtr<SpeechTaskCallback> callback = new SpeechTaskCallback(aTask, synth, offsets);
-  nsresult rv = aTask->Setup(callback, 0, 0, 0);
+  nsresult rv = aTask->Setup(callback);
   NS_ENSURE_SUCCESS(rv, rv);
 
   NSString* text = nsCocoaUtils::ToNSString(escapedText);
   BOOL success = [synth startSpeakingString:text];
   NS_ENSURE_TRUE(success, NS_ERROR_FAILURE);
 
   aTask->DispatchStart();
   return NS_OK;
 
   NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT;
 }
 
 NS_IMETHODIMP
-OSXSpeechSynthesizerService::GetServiceType(SpeechServiceType* aServiceType)
-{
-  *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO;
-  return NS_OK;
-}
-
-NS_IMETHODIMP
 OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic,
                                      const char16_t* aData)
 {
   return NS_OK;
 }
 
 OSXSpeechSynthesizerService*
 OSXSpeechSynthesizerService::GetInstance()
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
@@ -161,31 +161,17 @@ SpeechSynthesisRequestChild::RecvOnMark(
 // SpeechTaskChild
 
 SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
   : nsSpeechTask(aUtterance, aIsChrome)
 {
 }
 
 NS_IMETHODIMP
-SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback,
-                       uint32_t aChannels, uint32_t aRate, uint8_t argc)
-{
-  MOZ_CRASH("Should never be called from child");
-}
-
-NS_IMETHODIMP
-SpeechTaskChild::SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks,
-                           JSContext* aCx)
-{
-  MOZ_CRASH("Should never be called from child");
-}
-
-NS_IMETHODIMP
-SpeechTaskChild::SendAudioNative(int16_t* aData, uint32_t aDataLen)
+SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback)
 {
   MOZ_CRASH("Should never be called from child");
 }
 
 void
 SpeechTaskChild::Pause()
 {
   MOZ_ASSERT(mActor);
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
@@ -80,23 +80,17 @@ protected:
 
 class SpeechTaskChild : public nsSpeechTask
 {
   friend class SpeechSynthesisRequestChild;
 public:
 
   explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome);
 
-  NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback,
-                   uint32_t aChannels, uint32_t aRate, uint8_t argc) override;
-
-  NS_IMETHOD SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks,
-                       JSContext* aCx) override;
-
-  NS_IMETHOD SendAudioNative(int16_t* aData, uint32_t aDataLen) override;
+  NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback) override;
 
   void Pause() override;
 
   void Resume() override;
 
   void Cancel() override;
 
   void ForceEnd() override;
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
@@ -77,32 +77,32 @@ protected:
 
 class SpeechTaskParent : public nsSpeechTask
 {
   friend class SpeechSynthesisRequestParent;
 public:
   SpeechTaskParent(float aVolume, const nsAString& aUtterance, bool aIsChrome)
     : nsSpeechTask(aVolume, aUtterance, aIsChrome) {}
 
-  nsresult DispatchStartImpl(const nsAString& aUri);
+  nsresult DispatchStartImpl(const nsAString& aUri) override;
 
-  nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex);
+  nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) override;
 
-  nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex);
+  nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex) override;
 
-  nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex);
+  nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) override;
 
-  nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex);
+  nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex) override;
 
   nsresult DispatchBoundaryImpl(const nsAString& aName,
                                 float aElapsedTime, uint32_t aCharIndex,
-                                uint32_t aCharLength, uint8_t argc);
+                                uint32_t aCharLength, uint8_t argc) override;
 
   nsresult DispatchMarkImpl(const nsAString& aName,
-                            float aElapsedTime, uint32_t aCharIndex);
+                            float aElapsedTime, uint32_t aCharIndex) override;
 
 private:
   SpeechSynthesisRequestParent* mActor;
 };
 
 } // namespace dom
 } // namespace mozilla
 
--- a/dom/media/webspeech/synth/nsISpeechService.idl
+++ b/dom/media/webspeech/synth/nsISpeechService.idl
@@ -1,21 +1,17 @@
 /* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "nsISupports.idl"
 
-typedef unsigned short SpeechServiceType;
-
 /**
- * A callback is implemented by the service. For direct audio services, it is
- * required to implement these, although it could be helpful to use the
- * cancel method for shutting down the speech resources.
+ * A callback is implemented by the service.
  */
 [scriptable, uuid(c576de0c-8a3d-4570-be7e-9876d3e5bed2)]
 interface nsISpeechTaskCallback : nsISupports
 {
   /**
    * The user or application has paused the speech.
    */
   void onPause();
@@ -27,53 +23,34 @@ interface nsISpeechTaskCallback : nsISup
 
   /**
    * The user or application has canceled the speech.
    */
   void onCancel();
 
   /**
    * The user or application has changed the volume of this speech.
-   * This is only used on indirect audio service type.
    */
   void onVolumeChanged(in float aVolume);
 };
 
 
 /**
  * A task is associated with a single utterance. It is provided by the browser
  * to the service in the speak() method.
  */
 [scriptable, builtinclass, uuid(ad59949c-2437-4b35-8eeb-d760caab75c5)]
 interface nsISpeechTask : nsISupports
 {
   /**
    * Prepare browser for speech.
    *
    * @param aCallback callback object for mid-speech operations.
-   * @param aChannels number of audio channels. Only required
-   *                    in direct audio services
-   * @param aRate     audio rate. Only required in direct audio services
    */
-  [optional_argc] void setup(in nsISpeechTaskCallback aCallback,
-                               [optional] in uint32_t aChannels,
-                               [optional] in uint32_t aRate);
-
-  /**
-   * Send audio data to browser.
-   *
-   * @param aData     an Int16Array with PCM-16 audio data.
-   * @param aLandmarks an array of sample offset and landmark pairs.
-   *                     Used for emiting boundary and mark events.
-   */
-  [implicit_jscontext]
-  void sendAudio(in jsval aData, in jsval aLandmarks);
-
-  [noscript]
-  void sendAudioNative([array, size_is(aDataLen)] in short aData, in unsigned long aDataLen);
+  void setup(in nsISpeechTaskCallback aCallback);
 
   /**
    * Dispatch start event.
    */
   void dispatchStart();
 
   /**
    * Dispatch end event.
@@ -127,25 +104,21 @@ interface nsISpeechTask : nsISupports
    * @param aCharIndex   offset of spoken characters.
    */
   void dispatchMark(in DOMString aName, in float aElapsedTime, in unsigned long aCharIndex);
 };
 
 /**
  * The main interface of a speech synthesis service.
  *
- * A service's speak method could be implemented in two ways:
- *  1. Indirect audio - the service is responsible for outputting audio.
- *    The service calls the nsISpeechTask.dispatch* methods directly. Starting
- *    with dispatchStart() and ending with dispatchEnd or dispatchError().
- *
- *  2. Direct audio - the service provides us with PCM-16 data, and we output it.
- *    The service does not call the dispatch task methods directly. Instead,
- *    audio information is provided at setup(), and audio data is sent with
- *    sendAudio(). The utterance is terminated with an empty sendAudio().
+ * A service is responsible for outputting audio.
+ * The service dispatches events, starting with dispatchStart() and ending with
+ * dispatchEnd or dispatchError().
+ * A service must also respond with the currect actions and events in response
+ * to implemented callback methods.
  */
 [scriptable, uuid(9b7d59db-88ff-43d0-b6ee-9f63d042d08f)]
 interface nsISpeechService : nsISupports
 {
   /**
    * Speak the given text using the voice identified byu the given uri. See
    * W3C Speech API spec for information about pitch and rate.
    * https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#utterance-attributes
@@ -156,20 +129,15 @@ interface nsISpeechService : nsISupports
    * @param aRate   rate to speak voice in.
    * @param aPitch  pitch to speak voice in.
    * @param aTask  task instance for utterance, used for sending events or audio
    *                 data back to browser.
    */
   void speak(in DOMString aText, in DOMString aUri,
              in float aVolume, in float aRate, in float aPitch,
              in nsISpeechTask aTask);
-
-  const SpeechServiceType SERVICETYPE_DIRECT_AUDIO = 1;
-  const SpeechServiceType SERVICETYPE_INDIRECT_AUDIO = 2;
-
-  readonly attribute SpeechServiceType serviceType;
 };
 
 %{C++
 // This is the service category speech services could use to start up as
 // a component.
 #define NS_SPEECH_SYNTH_STARTED "speech-synth-started"
 %}
--- a/dom/media/webspeech/synth/nsSpeechTask.cpp
+++ b/dom/media/webspeech/synth/nsSpeechTask.cpp
@@ -2,141 +2,30 @@
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioChannelAgent.h"
 #include "AudioChannelService.h"
 #include "AudioSegment.h"
-#include "MediaStreamListener.h"
 #include "nsSpeechTask.h"
 #include "nsSynthVoiceRegistry.h"
 #include "SharedBuffer.h"
 #include "SpeechSynthesis.h"
 
-// GetCurrentTime is defined in winbase.h as zero argument macro forwarding to
-// GetTickCount() and conflicts with nsSpeechTask::GetCurrentTime().
-#ifdef GetCurrentTime
-#undef GetCurrentTime
-#endif
-
 #undef LOG
 extern mozilla::LogModule* GetSpeechSynthLog();
 #define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)
 
 #define AUDIO_TRACK 1
 
 namespace mozilla {
 namespace dom {
 
-class SynthStreamListener : public MediaStreamListener
-{
-public:
-  SynthStreamListener(nsSpeechTask* aSpeechTask,
-                      MediaStream* aStream,
-                      AbstractThread* aMainThread)
-    : mSpeechTask(aSpeechTask)
-    , mStream(aStream)
-    , mStarted(false)
-  {
-  }
-
-  void DoNotifyStarted()
-  {
-    if (mSpeechTask) {
-      mSpeechTask->DispatchStartInner();
-    }
-  }
-
-  void DoNotifyFinished()
-  {
-    if (mSpeechTask) {
-      mSpeechTask->DispatchEndInner(mSpeechTask->GetCurrentTime(),
-                                    mSpeechTask->GetCurrentCharOffset());
-    }
-  }
-
-  void NotifyEvent(MediaStreamGraph* aGraph,
-                   MediaStreamGraphEvent event) override
-  {
-    switch (event) {
-      case MediaStreamGraphEvent::EVENT_FINISHED:
-        {
-          RefPtr<SynthStreamListener> self = this;
-          if (!mStarted) {
-            mStarted = true;
-            aGraph->DispatchToMainThreadAfterStreamStateUpdate(
-              NS_NewRunnableFunction(
-                "dom::SynthStreamListener::NotifyEvent",
-                [self] {
-                  // "start" event will be fired in DoNotifyStarted() which is
-                  // not allowed in stable state, so we do it asynchronously in
-                  // next run.
-                  NS_DispatchToMainThread(NewRunnableMethod(
-                    "dom::SynthStreamListener::DoNotifyStarted",
-                    self,
-                    &SynthStreamListener::DoNotifyStarted));
-                }));
-          }
-
-          aGraph->DispatchToMainThreadAfterStreamStateUpdate(
-            NS_NewRunnableFunction(
-              "dom::SynthStreamListener::NotifyEvent",
-              [self] {
-                // "end" event will be fired in DoNotifyFinished() which is
-                // not allowed in stable state, so we do it asynchronously in
-                // next run.
-                NS_DispatchToMainThread(NewRunnableMethod(
-                  "dom::SynthStreamListener::DoNotifyFinished",
-                  self,
-                  &SynthStreamListener::DoNotifyFinished));
-              }));
-        }
-        break;
-      case MediaStreamGraphEvent::EVENT_REMOVED:
-        mSpeechTask = nullptr;
-        // Dereference MediaStream to destroy safety
-        mStream = nullptr;
-        break;
-      default:
-        break;
-    }
-  }
-
-  void NotifyBlockingChanged(MediaStreamGraph* aGraph, Blocking aBlocked) override
-  {
-    if (aBlocked == MediaStreamListener::UNBLOCKED && !mStarted) {
-      mStarted = true;
-      RefPtr<SynthStreamListener> self = this;
-      aGraph->DispatchToMainThreadAfterStreamStateUpdate(
-        NS_NewRunnableFunction(
-          "dom::SynthStreamListener::NotifyBlockingChanged",
-          [self] {
-            // "start" event will be fired in DoNotifyStarted() which is
-            // not allowed in stable state, so we do it asynchronously in
-            // next run.
-            NS_DispatchToMainThread(NewRunnableMethod(
-              "dom::SynthStreamListener::DoNotifyStarted",
-              self,
-              &SynthStreamListener::DoNotifyStarted));
-          }));
-    }
-  }
-
-private:
-  // Raw pointer; if we exist, the stream exists,
-  // and 'mSpeechTask' exclusively owns it and therefor exists as well.
-  nsSpeechTask* mSpeechTask;
-  // This is KungFuDeathGrip for MediaStream
-  RefPtr<MediaStream> mStream;
-
-  bool mStarted;
-};
-
 // nsSpeechTask
 
 NS_IMPL_CYCLE_COLLECTION(nsSpeechTask, mSpeechSynthesis, mUtterance, mCallback);
 
 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask)
   NS_INTERFACE_MAP_ENTRY(nsISpeechTask)
   NS_INTERFACE_MAP_ENTRY(nsIAudioChannelAgentCallback)
   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
@@ -147,268 +36,80 @@ NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeech
 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
 
 nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
   : mUtterance(aUtterance)
   , mInited(false)
   , mPrePaused(false)
   , mPreCanceled(false)
   , mCallback(nullptr)
-  , mIndirectAudio(false)
   , mIsChrome(aIsChrome)
 {
   mText = aUtterance->mText;
   mVolume = aUtterance->Volume();
 }
 
 nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome)
   : mUtterance(nullptr)
   , mVolume(aVolume)
   , mText(aText)
   , mInited(false)
   , mPrePaused(false)
   , mPreCanceled(false)
   , mCallback(nullptr)
-  , mIndirectAudio(false)
   , mIsChrome(aIsChrome)
 {
 }
 
 nsSpeechTask::~nsSpeechTask()
 {
   LOG(LogLevel::Debug, ("~nsSpeechTask"));
-  if (mStream) {
-    if (!mStream->IsDestroyed()) {
-      mStream->Destroy();
-    }
-
-    // This will finally destroyed by SynthStreamListener becasue
-    // MediaStream::Destroy() is async.
-    mStream = nullptr;
-  }
-
-  if (mPort) {
-    mPort->Destroy();
-    mPort = nullptr;
-  }
 }
 
 void
-nsSpeechTask::InitDirectAudio()
+nsSpeechTask::Init()
 {
-  // nullptr as final argument here means that this is not tied to a window.
-  // This is a global MSG.
-  mStream = MediaStreamGraph::GetInstance(MediaStreamGraph::AUDIO_THREAD_DRIVER,
-                                          nullptr)->
-    CreateSourceStream();
-  mIndirectAudio = false;
-  mInited = true;
-}
-
-void
-nsSpeechTask::InitIndirectAudio()
-{
-  mIndirectAudio = true;
   mInited = true;
 }
 
 void
 nsSpeechTask::SetChosenVoiceURI(const nsAString& aUri)
 {
   mChosenVoiceURI = aUri;
 }
 
 NS_IMETHODIMP
-nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback,
-                    uint32_t aChannels, uint32_t aRate, uint8_t argc)
+nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback)
 {
   MOZ_ASSERT(XRE_IsParentProcess());
 
   LOG(LogLevel::Debug, ("nsSpeechTask::Setup"));
 
   mCallback = aCallback;
 
-  if (mIndirectAudio) {
-    MOZ_ASSERT(!mStream);
-    if (argc > 0) {
-      NS_WARNING("Audio info arguments in Setup() are ignored for indirect audio services.");
-    }
-    return NS_OK;
-  }
-
-  // mStream is set up in Init() that should be called before this.
-  MOZ_ASSERT(mStream);
-
-  mStream->AddListener(
-    // Non DocGroup-version of AbstractThread::MainThread for the task in parent.
-    new SynthStreamListener(this, mStream, AbstractThread::MainThread()));
-
-  // XXX: Support more than one channel
-  if(NS_WARN_IF(!(aChannels == 1))) {
-    return NS_ERROR_FAILURE;
-  }
-
-  mChannels = aChannels;
-
-  AudioSegment* segment = new AudioSegment();
-  mStream->AddAudioTrack(AUDIO_TRACK, aRate, 0, segment);
-  mStream->AddAudioOutput(this);
-  mStream->SetAudioOutputVolume(this, mVolume);
-
-  return NS_OK;
-}
-
-static RefPtr<mozilla::SharedBuffer>
-makeSamples(int16_t* aData, uint32_t aDataLen)
-{
-  RefPtr<mozilla::SharedBuffer> samples =
-    SharedBuffer::Create(aDataLen * sizeof(int16_t));
-  int16_t* frames = static_cast<int16_t*>(samples->Data());
-
-  for (uint32_t i = 0; i < aDataLen; i++) {
-    frames[i] = aData[i];
-  }
-
-  return samples;
-}
-
-NS_IMETHODIMP
-nsSpeechTask::SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks,
-                        JSContext* aCx)
-{
-  MOZ_ASSERT(XRE_IsParentProcess());
-
-  if(NS_WARN_IF(!(mStream))) {
-    return NS_ERROR_NOT_AVAILABLE;
-  }
-  if(NS_WARN_IF(mStream->IsDestroyed())) {
-    return NS_ERROR_NOT_AVAILABLE;
-  }
-  if(NS_WARN_IF(!(mChannels))) {
-    return NS_ERROR_FAILURE;
-  }
-  if(NS_WARN_IF(!(aData.isObject()))) {
-    return NS_ERROR_INVALID_ARG;
-  }
-
-  if (mIndirectAudio) {
-    NS_WARNING("Can't call SendAudio from an indirect audio speech service.");
-    return NS_ERROR_FAILURE;
-  }
-
-  JS::Rooted<JSObject*> darray(aCx, &aData.toObject());
-  JSAutoCompartment ac(aCx, darray);
-
-  JS::Rooted<JSObject*> tsrc(aCx, nullptr);
-
-  // Allow either Int16Array or plain JS Array
-  if (JS_IsInt16Array(darray)) {
-    tsrc = darray;
-  } else {
-    bool isArray;
-    if (!JS_IsArrayObject(aCx, darray, &isArray)) {
-      return NS_ERROR_UNEXPECTED;
-    }
-    if (isArray) {
-      tsrc = JS_NewInt16ArrayFromArray(aCx, darray);
-    }
-  }
-
-  if (!tsrc) {
-    return NS_ERROR_DOM_TYPE_MISMATCH_ERR;
-  }
-
-  uint32_t dataLen = JS_GetTypedArrayLength(tsrc);
-  RefPtr<mozilla::SharedBuffer> samples;
-  {
-    JS::AutoCheckCannotGC nogc;
-    bool isShared;
-    int16_t* data = JS_GetInt16ArrayData(tsrc, &isShared, nogc);
-    if (isShared) {
-      // Must opt in to using shared data.
-      return NS_ERROR_DOM_TYPE_MISMATCH_ERR;
-    }
-    samples = makeSamples(data, dataLen);
-  }
-  SendAudioImpl(samples, dataLen);
-
   return NS_OK;
 }
 
 NS_IMETHODIMP
-nsSpeechTask::SendAudioNative(int16_t* aData, uint32_t aDataLen)
-{
-  MOZ_ASSERT(XRE_IsParentProcess());
-
-  if(NS_WARN_IF(!(mStream))) {
-    return NS_ERROR_NOT_AVAILABLE;
-  }
-  if(NS_WARN_IF(mStream->IsDestroyed())) {
-    return NS_ERROR_NOT_AVAILABLE;
-  }
-  if(NS_WARN_IF(!(mChannels))) {
-    return NS_ERROR_FAILURE;
-  }
-
-  if (mIndirectAudio) {
-    NS_WARNING("Can't call SendAudio from an indirect audio speech service.");
-    return NS_ERROR_FAILURE;
-  }
-
-  RefPtr<mozilla::SharedBuffer> samples = makeSamples(aData, aDataLen);
-  SendAudioImpl(samples, aDataLen);
-
-  return NS_OK;
-}
-
-void
-nsSpeechTask::SendAudioImpl(RefPtr<mozilla::SharedBuffer>& aSamples, uint32_t aDataLen)
-{
-  if (aDataLen == 0) {
-    mStream->EndAllTrackAndFinish();
-    return;
-  }
-
-  AudioSegment segment;
-  AutoTArray<const int16_t*, 1> channelData;
-  channelData.AppendElement(static_cast<int16_t*>(aSamples->Data()));
-  segment.AppendFrames(aSamples.forget(), channelData, aDataLen,
-                       PRINCIPAL_HANDLE_NONE);
-  mStream->AppendToTrack(1, &segment);
-  mStream->AdvanceKnownTracksTime(STREAM_TIME_MAX);
-}
-
-NS_IMETHODIMP
 nsSpeechTask::DispatchStart()
 {
-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchStart() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
-  return DispatchStartInner();
-}
-
-nsresult
-nsSpeechTask::DispatchStartInner()
-{
   nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true);
   return DispatchStartImpl();
 }
 
 nsresult
 nsSpeechTask::DispatchStartImpl()
 {
   return DispatchStartImpl(mChosenVoiceURI);
 }
 
 nsresult
 nsSpeechTask::DispatchStartImpl(const nsAString& aUri)
 {
-  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStart"));
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStartImpl"));
 
   MOZ_ASSERT(mUtterance);
   if(NS_WARN_IF(!(mUtterance->mState == SpeechSynthesisUtterance::STATE_PENDING))) {
     return NS_ERROR_NOT_AVAILABLE;
   }
 
   CreateAudioChannelAgent();
 
@@ -418,54 +119,38 @@ nsSpeechTask::DispatchStartImpl(const ns
                                            nullptr, 0, EmptyString());
 
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex)
 {
-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchEnd() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
-  return DispatchEndInner(aElapsedTime, aCharIndex);
-}
-
-nsresult
-nsSpeechTask::DispatchEndInner(float aElapsedTime, uint32_t aCharIndex)
-{
   // After we end, no callback functions should go through.
   mCallback = nullptr;
 
   if (!mPreCanceled) {
     nsSynthVoiceRegistry::GetInstance()->SpeakNext();
   }
 
   return DispatchEndImpl(aElapsedTime, aCharIndex);
 }
 
 nsresult
 nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex)
 {
-  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEnd\n"));
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEndImpl"));
 
   DestroyAudioChannelAgent();
 
   MOZ_ASSERT(mUtterance);
   if(NS_WARN_IF(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED)) {
     return NS_ERROR_NOT_AVAILABLE;
   }
 
-  // XXX: This should not be here, but it prevents a crash in MSG.
-  if (mStream) {
-    mStream->Destroy();
-  }
-
   RefPtr<SpeechSynthesisUtterance> utterance = mUtterance;
 
   if (mSpeechSynthesis) {
     mSpeechSynthesis->OnEnd(this);
   }
 
   if (utterance->mState == SpeechSynthesisUtterance::STATE_PENDING) {
     utterance->mState = SpeechSynthesisUtterance::STATE_NONE;
@@ -477,28 +162,23 @@ nsSpeechTask::DispatchEndImpl(float aEla
   }
 
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex)
 {
-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchPause() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
   return DispatchPauseImpl(aElapsedTime, aCharIndex);
 }
 
 nsresult
 nsSpeechTask::DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex)
 {
-  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPause"));
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPauseImpl"));
   MOZ_ASSERT(mUtterance);
   if(NS_WARN_IF(mUtterance->mPaused)) {
     return NS_ERROR_NOT_AVAILABLE;
   }
   if(NS_WARN_IF(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED)) {
     return NS_ERROR_NOT_AVAILABLE;
   }
 
@@ -509,28 +189,23 @@ nsSpeechTask::DispatchPauseImpl(float aE
                                              EmptyString());
   }
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex)
 {
-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchResume() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
   return DispatchResumeImpl(aElapsedTime, aCharIndex);
 }
 
 nsresult
 nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex)
 {
-  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResume"));
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResumeImpl"));
   MOZ_ASSERT(mUtterance);
   if(NS_WARN_IF(!(mUtterance->mPaused))) {
     return NS_ERROR_NOT_AVAILABLE;
   }
   if(NS_WARN_IF(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED)) {
     return NS_ERROR_NOT_AVAILABLE;
   }
 
@@ -542,35 +217,24 @@ nsSpeechTask::DispatchResumeImpl(float a
   }
 
   return NS_OK;
 }
 
 void
 nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex)
 {
-  DispatchErrorInner(aElapsedTime, aCharIndex);
+  DispatchError(aElapsedTime, aCharIndex);
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex)
 {
   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchError"));
 
-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchError() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
-  return DispatchErrorInner(aElapsedTime, aCharIndex);
-}
-
-nsresult
-nsSpeechTask::DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex)
-{
   if (!mPreCanceled) {
     nsSynthVoiceRegistry::GetInstance()->SpeakNext();
   }
 
   return DispatchErrorImpl(aElapsedTime, aCharIndex);
 }
 
 nsresult
@@ -592,21 +256,16 @@ nsSpeechTask::DispatchErrorImpl(float aE
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchBoundary(const nsAString& aName,
                                float aElapsedTime, uint32_t aCharIndex,
                                uint32_t aCharLength, uint8_t argc)
 {
-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchBoundary() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
   return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex, aCharLength, argc);
 }
 
 nsresult
 nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName,
                                    float aElapsedTime, uint32_t aCharIndex,
                                    uint32_t aCharLength, uint8_t argc)
 {
@@ -621,21 +280,16 @@ nsSpeechTask::DispatchBoundaryImpl(const
 
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsSpeechTask::DispatchMark(const nsAString& aName,
                            float aElapsedTime, uint32_t aCharIndex)
 {
-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchMark() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
   return DispatchMarkImpl(aName, aElapsedTime, aCharIndex);
 }
 
 nsresult
 nsSpeechTask::DispatchMarkImpl(const nsAString& aName,
                                float aElapsedTime, uint32_t aCharIndex)
 {
   MOZ_ASSERT(mUtterance);
@@ -654,104 +308,64 @@ nsSpeechTask::Pause()
 {
   MOZ_ASSERT(XRE_IsParentProcess());
 
   if (mCallback) {
     DebugOnly<nsresult> rv = mCallback->OnPause();
     NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to call onPause() callback");
   }
 
-  if (mStream) {
-    mStream->Suspend();
-  }
-
   if (!mInited) {
     mPrePaused = true;
   }
-
-  if (!mIndirectAudio) {
-    DispatchPauseImpl(GetCurrentTime(), GetCurrentCharOffset());
-  }
 }
 
 void
 nsSpeechTask::Resume()
 {
   MOZ_ASSERT(XRE_IsParentProcess());
 
   if (mCallback) {
     DebugOnly<nsresult> rv = mCallback->OnResume();
     NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
                          "Unable to call onResume() callback");
   }
 
-  if (mStream) {
-    mStream->Resume();
-  }
-
   if (mPrePaused) {
     mPrePaused = false;
     nsSynthVoiceRegistry::GetInstance()->ResumeQueue();
   }
-
-  if (!mIndirectAudio) {
-    DispatchResumeImpl(GetCurrentTime(), GetCurrentCharOffset());
-  }
 }
 
 void
 nsSpeechTask::Cancel()
 {
   MOZ_ASSERT(XRE_IsParentProcess());
 
   LOG(LogLevel::Debug, ("nsSpeechTask::Cancel"));
 
   if (mCallback) {
     DebugOnly<nsresult> rv = mCallback->OnCancel();
     NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
                          "Unable to call onCancel() callback");
   }
 
-  if (mStream) {
-    mStream->Suspend();
-  }
-
   if (!mInited) {
     mPreCanceled = true;
   }
-
-  if (!mIndirectAudio) {
-    DispatchEndInner(GetCurrentTime(), GetCurrentCharOffset());
-  }
 }
 
 void
 nsSpeechTask::ForceEnd()
 {
-  if (mStream) {
-    mStream->Suspend();
-  }
-
   if (!mInited) {
     mPreCanceled = true;
   }
 
-  DispatchEndInner(GetCurrentTime(), GetCurrentCharOffset());
-}
-
-float
-nsSpeechTask::GetCurrentTime()
-{
-  return mStream ? (float)(mStream->GetCurrentTime() / 1000000.0) : 0;
-}
-
-uint32_t
-nsSpeechTask::GetCurrentCharOffset()
-{
-  return mStream && mStream->IsFinished() ? mText.Length() : 0;
+  DispatchEnd(0, 0);
 }
 
 void
 nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis)
 {
   mSpeechSynthesis = aSpeechSynthesis;
 }
 
@@ -818,18 +432,15 @@ nsSpeechTask::WindowAudioCaptureChanged(
 {
   // This is not supported yet.
   return NS_OK;
 }
 
 void
 nsSpeechTask::SetAudioOutputVolume(float aVolume)
 {
-  if (mStream && !mStream->IsDestroyed()) {
-    mStream->SetAudioOutputVolume(this, aVolume);
-  }
-  if (mIndirectAudio && mCallback) {
+  if (mCallback) {
     mCallback->OnVolumeChanged(aVolume);
   }
 }
 
 } // namespace dom
 } // namespace mozilla
--- a/dom/media/webspeech/synth/nsSpeechTask.h
+++ b/dom/media/webspeech/synth/nsSpeechTask.h
@@ -2,17 +2,16 @@
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef mozilla_dom_nsSpeechTask_h
 #define mozilla_dom_nsSpeechTask_h
 
-#include "MediaStreamGraph.h"
 #include "SpeechSynthesisUtterance.h"
 #include "nsIAudioChannelAgent.h"
 #include "nsISpeechService.h"
 
 namespace mozilla {
 
 class SharedBuffer;
 
@@ -41,45 +40,31 @@ public:
   virtual void Pause();
 
   virtual void Resume();
 
   virtual void Cancel();
 
   virtual void ForceEnd();
 
-  float GetCurrentTime();
-
-  uint32_t GetCurrentCharOffset();
-
   void SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis);
 
-  void InitDirectAudio();
-  void InitIndirectAudio();
+  void Init();
 
   void SetChosenVoiceURI(const nsAString& aUri);
 
   virtual void SetAudioOutputVolume(float aVolume);
 
   void ForceError(float aElapsedTime, uint32_t aCharIndex);
 
-  bool IsPreCanceled()
-  {
-    return mPreCanceled;
-  };
+  bool IsPreCanceled() { return mPreCanceled; };
 
-  bool IsPrePaused()
-  {
-    return mPrePaused;
-  }
+  bool IsPrePaused() { return mPrePaused; }
 
-  bool IsChrome()
-  {
-    return mIsChrome;
-  }
+  bool IsChrome() { return mIsChrome; }
 
 protected:
   virtual ~nsSpeechTask();
 
   nsresult DispatchStartImpl();
 
   virtual nsresult DispatchStartImpl(const nsAString& aUri);
 
@@ -110,41 +95,26 @@ protected:
 
   bool mPrePaused;
 
   bool mPreCanceled;
 
 private:
   void End();
 
-  void SendAudioImpl(RefPtr<mozilla::SharedBuffer>& aSamples, uint32_t aDataLen);
-
-  nsresult DispatchStartInner();
-
-  nsresult DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex);
-  nsresult DispatchEndInner(float aElapsedTime, uint32_t aCharIndex);
-
   void CreateAudioChannelAgent();
 
   void DestroyAudioChannelAgent();
 
-  RefPtr<SourceMediaStream> mStream;
-
-  RefPtr<MediaInputPort> mPort;
-
   nsCOMPtr<nsISpeechTaskCallback> mCallback;
 
   nsCOMPtr<nsIAudioChannelAgent> mAudioChannelAgent;
 
-  uint32_t mChannels;
-
   RefPtr<SpeechSynthesis> mSpeechSynthesis;
 
-  bool mIndirectAudio;
-
   nsString mChosenVoiceURI;
 
   bool mIsChrome;
 };
 
 } // namespace dom
 } // namespace mozilla
 
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
@@ -819,30 +819,18 @@ nsSynthVoiceRegistry::SpeakImpl(VoiceDat
                                 const float& aRate,
                                 const float& aPitch)
 {
   LOG(LogLevel::Debug,
       ("nsSynthVoiceRegistry::SpeakImpl queueing text='%s' uri='%s' rate=%f pitch=%f",
        NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aVoice->mUri).get(),
        aRate, aPitch));
 
-  SpeechServiceType serviceType;
-
-  DebugOnly<nsresult> rv = aVoice->mService->GetServiceType(&serviceType);
-  NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to get speech service type");
-
-  if (serviceType == nsISpeechService::SERVICETYPE_INDIRECT_AUDIO) {
-    aTask->InitIndirectAudio();
-  } else {
-    aTask->InitDirectAudio();
-  }
+  aTask->Init();
 
   if (NS_FAILED(aVoice->mService->Speak(aText, aVoice->mUri, aVolume, aRate,
                                         aPitch, aTask))) {
-    if (serviceType == nsISpeechService::SERVICETYPE_INDIRECT_AUDIO) {
-      aTask->DispatchError(0, 0);
-    }
-    // XXX When using direct audio, no way to dispatch error
+    aTask->DispatchError(0, 0);
   }
 }
 
 } // namespace dom
 } // namespace mozilla
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.h
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h
@@ -5,17 +5,16 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef mozilla_dom_nsSynthVoiceRegistry_h
 #define mozilla_dom_nsSynthVoiceRegistry_h
 
 #include "nsISynthVoiceRegistry.h"
 #include "nsRefPtrHashtable.h"
 #include "nsTArray.h"
-#include "MediaStreamGraph.h"
 
 class nsISpeechService;
 
 namespace mozilla {
 namespace dom {
 
 class RemoteVoice;
 class SpeechSynthesisUtterance;
--- a/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp
+++ b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp
@@ -508,18 +508,17 @@ SpeechDispatcherService::Speak(const nsA
   }
 
   spd_set_voice_rate(mSpeechdClient, static_cast<int>(rate));
 
   // We provide a pitch of 0 to 2 with 1 being the default.
   // speech-dispatcher expects -100 to 100 with 0 being default.
   spd_set_voice_pitch(mSpeechdClient, static_cast<int>((aPitch - 1) * 100));
 
-  // The last three parameters don't matter for an indirect service
-  nsresult rv = aTask->Setup(callback, 0, 0, 0);
+  nsresult rv = aTask->Setup(callback);
 
   if (NS_FAILED(rv)) {
     return rv;
   }
 
   if (aText.Length()) {
     int msg_id = spd_say(
       mSpeechdClient, SPD_MESSAGE, NS_ConvertUTF16toUTF8(aText).get());
@@ -544,23 +543,16 @@ SpeechDispatcherService::Speak(const nsA
       callback,
       &SpeechDispatcherCallback::OnSpeechEvent,
       SPD_EVENT_END));
   }
 
   return NS_OK;
 }
 
-NS_IMETHODIMP
-SpeechDispatcherService::GetServiceType(SpeechServiceType* aServiceType)
-{
-  *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO;
-  return NS_OK;
-}
-
 SpeechDispatcherService*
 SpeechDispatcherService::GetInstance(bool create)
 {
   if (XRE_GetProcessType() != GeckoProcessType_Default) {
     MOZ_ASSERT(false,
                "SpeechDispatcherService can only be started on main gecko process");
     return nullptr;
   }
--- a/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp
+++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp
@@ -39,30 +39,27 @@ struct VoiceDetails
 {
   const char* uri;
   const char* name;
   const char* lang;
   bool defaultVoice;
   uint32_t flags;
 };
 
-static const VoiceDetails sDirectVoices[] = {
-  {"urn:moz-tts:fake-direct:bob", "Bob Marley", "en-JM", true, 0},
-  {"urn:moz-tts:fake-direct:amy", "Amy Winehouse", "en-GB", false, 0},
-  {"urn:moz-tts:fake-direct:lenny", "Leonard Cohen", "en-CA", false, 0},
-  {"urn:moz-tts:fake-direct:celine", "Celine Dion", "fr-CA", false, 0},
-  {"urn:moz-tts:fake-direct:julie", "Julieta Venegas", "es-MX", false, },
-};
-
-static const VoiceDetails sIndirectVoices[] = {
-  {"urn:moz-tts:fake-indirect:zanetta", "Zanetta Farussi", "it-IT", false, 0},
-  {"urn:moz-tts:fake-indirect:margherita", "Margherita Durastanti", "it-IT-noevents-noend", false, eSuppressEvents | eSuppressEnd},
-  {"urn:moz-tts:fake-indirect:teresa", "Teresa Cornelys", "it-IT-noend", false, eSuppressEnd},
-  {"urn:moz-tts:fake-indirect:cecilia", "Cecilia Bartoli", "it-IT-failatstart", false, eFailAtStart},
-  {"urn:moz-tts:fake-indirect:gottardo", "Gottardo Aldighieri", "it-IT-fail", false, eFail},
+static const VoiceDetails sVoices[] = {
+  {"urn:moz-tts:fake:bob", "Bob Marley", "en-JM", true, 0},
+  {"urn:moz-tts:fake:amy", "Amy Winehouse", "en-GB", false, 0},
+  {"urn:moz-tts:fake:lenny", "Leonard Cohen", "en-CA", false, 0},
+  {"urn:moz-tts:fake:celine", "Celine Dion", "fr-CA", false, 0},
+  {"urn:moz-tts:fake:julie", "Julieta Venegas", "es-MX", false, },
+  {"urn:moz-tts:fake:zanetta", "Zanetta Farussi", "it-IT", false, 0},
+  {"urn:moz-tts:fake:margherita", "Margherita Durastanti", "it-IT-noevents-noend", false, eSuppressEvents | eSuppressEnd},
+  {"urn:moz-tts:fake:teresa", "Teresa Cornelys", "it-IT-noend", false, eSuppressEnd},
+  {"urn:moz-tts:fake:cecilia", "Cecilia Bartoli", "it-IT-failatstart", false, eFailAtStart},
+  {"urn:moz-tts:fake:gottardo", "Gottardo Aldighieri", "it-IT-fail", false, eFail},
 };
 
 // FakeSynthCallback
 class FakeSynthCallback : public nsISpeechTaskCallback
 {
 public:
   explicit FakeSynthCallback(nsISpeechTask* aTask) : mTask(aTask) { }
   NS_DECL_CYCLE_COLLECTING_ISUPPORTS
@@ -96,115 +93,50 @@ public:
   }
 
   NS_IMETHOD OnVolumeChanged(float aVolume) override
   {
     return NS_OK;
   }
 
 private:
-  virtual ~FakeSynthCallback() { }
+  virtual ~FakeSynthCallback() = default;
 
   nsCOMPtr<nsISpeechTask> mTask;
 };
 
 NS_IMPL_CYCLE_COLLECTION(FakeSynthCallback, mTask);
 
 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(FakeSynthCallback)
   NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
 NS_INTERFACE_MAP_END
 
 NS_IMPL_CYCLE_COLLECTING_ADDREF(FakeSynthCallback)
 NS_IMPL_CYCLE_COLLECTING_RELEASE(FakeSynthCallback)
 
-// FakeDirectAudioSynth
+// FakeSpeechSynth
 
-class FakeDirectAudioSynth : public nsISpeechService
+class FakeSpeechSynth : public nsISpeechService
 {
 
 public:
-  FakeDirectAudioSynth() { }
+  FakeSpeechSynth() = default;
 
   NS_DECL_ISUPPORTS
   NS_DECL_NSISPEECHSERVICE
 
 private:
-  virtual ~FakeDirectAudioSynth() { }
+  virtual ~FakeSpeechSynth() = default;
 };
 
-NS_IMPL_ISUPPORTS(FakeDirectAudioSynth, nsISpeechService)
+NS_IMPL_ISUPPORTS(FakeSpeechSynth, nsISpeechService)
 
 NS_IMETHODIMP
-FakeDirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri,
-                            float aVolume, float aRate, float aPitch,
-                            nsISpeechTask* aTask)
-{
-  class Runnable final : public mozilla::Runnable
-  {
-  public:
-    Runnable(nsISpeechTask* aTask, const nsAString& aText)
-      : mozilla::Runnable("Runnable")
-      , mTask(aTask)
-      , mText(aText)
-    {
-    }
-
-    NS_IMETHOD Run() override
-    {
-      RefPtr<FakeSynthCallback> cb = new FakeSynthCallback(nullptr);
-      mTask->Setup(cb, CHANNELS, SAMPLERATE, 2);
-
-      // Just an arbitrary multiplier. Pretend that each character is
-      // synthesized to 40 frames.
-      uint32_t frames_length = 40 * mText.Length();
-      auto frames = MakeUnique<int16_t[]>(frames_length);
-      mTask->SendAudioNative(frames.get(), frames_length);
-
-      mTask->SendAudioNative(nullptr, 0);
-
-      return NS_OK;
-    }
-
-  private:
-    nsCOMPtr<nsISpeechTask> mTask;
-    nsString mText;
-  };
-
-  nsCOMPtr<nsIRunnable> runnable = new Runnable(aTask, aText);
-  NS_DispatchToMainThread(runnable);
-  return NS_OK;
-}
-
-NS_IMETHODIMP
-FakeDirectAudioSynth::GetServiceType(SpeechServiceType* aServiceType)
-{
-  *aServiceType = nsISpeechService::SERVICETYPE_DIRECT_AUDIO;
-  return NS_OK;
-}
-
-// FakeDirectAudioSynth
-
-class FakeIndirectAudioSynth : public nsISpeechService
-{
-
-public:
-  FakeIndirectAudioSynth() {}
-
-  NS_DECL_ISUPPORTS
-  NS_DECL_NSISPEECHSERVICE
-
-private:
-  virtual ~FakeIndirectAudioSynth() { }
-};
-
-NS_IMPL_ISUPPORTS(FakeIndirectAudioSynth, nsISpeechService)
-
-NS_IMETHODIMP
-FakeIndirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri,
+FakeSpeechSynth::Speak(const nsAString& aText, const nsAString& aUri,
                               float aVolume, float aRate, float aPitch,
                               nsISpeechTask* aTask)
 {
   class DispatchStart final : public Runnable
   {
   public:
     explicit DispatchStart(nsISpeechTask* aTask)
       : mozilla::Runnable("DispatchStart")
@@ -263,70 +195,56 @@ FakeIndirectAudioSynth::Speak(const nsAS
     }
 
   private:
     nsCOMPtr<nsISpeechTask> mTask;
     nsString mText;
   };
 
   uint32_t flags = 0;
-  for (uint32_t i = 0; i < ArrayLength(sIndirectVoices); i++) {
-    if (aUri.EqualsASCII(sIndirectVoices[i].uri)) {
-      flags = sIndirectVoices[i].flags;
+  for (VoiceDetails voice : sVoices) {
+    if (aUri.EqualsASCII(voice.uri)) {
+      flags = voice.flags;
+      break;
     }
   }
 
   if (flags & eFailAtStart) {
     return NS_ERROR_FAILURE;
   }
 
   RefPtr<FakeSynthCallback> cb = new FakeSynthCallback(
     (flags & eSuppressEvents) ? nullptr : aTask);
 
-  aTask->Setup(cb, 0, 0, 0);
+  aTask->Setup(cb);
 
   nsCOMPtr<nsIRunnable> runnable = new DispatchStart(aTask);
   NS_DispatchToMainThread(runnable);
 
   if (flags & eFail) {
     runnable = new DispatchError(aTask, aText);
     NS_DispatchToMainThread(runnable);
   } else if ((flags & eSuppressEnd) == 0) {
     runnable = new DispatchEnd(aTask, aText);
     NS_DispatchToMainThread(runnable);
   }
 
   return NS_OK;
 }
 
-NS_IMETHODIMP
-FakeIndirectAudioSynth::GetServiceType(SpeechServiceType* aServiceType)
-{
-  *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO;
-  return NS_OK;
-}
-
 // nsFakeSynthService
 
 NS_INTERFACE_MAP_BEGIN(nsFakeSynthServices)
   NS_INTERFACE_MAP_ENTRY(nsIObserver)
   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver)
 NS_INTERFACE_MAP_END
 
 NS_IMPL_ADDREF(nsFakeSynthServices)
 NS_IMPL_RELEASE(nsFakeSynthServices)
 
-nsFakeSynthServices::nsFakeSynthServices()
-{
-}
-
-nsFakeSynthServices::~nsFakeSynthServices()
-{
-}
-
 static void
 AddVoices(nsISpeechService* aService, const VoiceDetails* aVoices, uint32_t aLength)
 {
   RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance();
   for (uint32_t i = 0; i < aLength; i++) {
     NS_ConvertUTF8toUTF16 name(aVoices[i].name);
     NS_ConvertUTF8toUTF16 uri(aVoices[i].uri);
     NS_ConvertUTF8toUTF16 lang(aVoices[i].lang);
@@ -339,21 +257,18 @@ AddVoices(nsISpeechService* aService, co
   }
 
   registry->NotifyVoicesChanged();
 }
 
 void
 nsFakeSynthServices::Init()
 {
-  mDirectService = new FakeDirectAudioSynth();
-  AddVoices(mDirectService, sDirectVoices, ArrayLength(sDirectVoices));
-
-  mIndirectService = new FakeIndirectAudioSynth();
-  AddVoices(mIndirectService, sIndirectVoices, ArrayLength(sIndirectVoices));
+  mSynthService = new FakeSpeechSynth();
+  AddVoices(mSynthService, sVoices, ArrayLength(sVoices));
 }
 
 // nsIObserver
 
 NS_IMETHODIMP
 nsFakeSynthServices::Observe(nsISupports* aSubject, const char* aTopic,
                              const char16_t* aData)
 {
--- a/dom/media/webspeech/synth/test/nsFakeSynthServices.h
+++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.h
@@ -20,33 +20,31 @@ namespace dom {
 
 class nsFakeSynthServices : public nsIObserver
 {
 
 public:
   NS_DECL_ISUPPORTS
   NS_DECL_NSIOBSERVER
 
-  nsFakeSynthServices();
+  nsFakeSynthServices() = default;
 
   static nsFakeSynthServices* GetInstance();
 
   static already_AddRefed<nsFakeSynthServices> GetInstanceForService();
 
   static void Shutdown();
 
 private:
 
-  virtual ~nsFakeSynthServices();
+  virtual ~nsFakeSynthServices() = default;
 
   void Init();
 
-  nsCOMPtr<nsISpeechService> mDirectService;
-
-  nsCOMPtr<nsISpeechService> mIndirectService;
+  nsCOMPtr<nsISpeechService> mSynthService;
 
   static StaticRefPtr<nsFakeSynthServices> sSingleton;
 };
 
 } // namespace dom
 } // namespace mozilla
 
 #endif
--- a/dom/media/webspeech/synth/windows/SapiService.cpp
+++ b/dom/media/webspeech/synth/windows/SapiService.cpp
@@ -397,44 +397,37 @@ SapiService::Speak(const nsAString& aTex
   }
 
   xml.AppendLiteral("</pitch>");
 
   RefPtr<SapiCallback> callback =
     new SapiCallback(aTask, spVoice, textOffset, aText.Length());
 
   // The last three parameters doesn't matter for an indirect service
-  nsresult rv = aTask->Setup(callback, 0, 0, 0);
+  nsresult rv = aTask->Setup(callback);
   if (NS_FAILED(rv)) {
     return rv;
   }
 
   ULONG streamNum;
   if (FAILED(spVoice->Speak(xml.get(), SPF_ASYNC, &streamNum))) {
-    aTask->Setup(nullptr, 0, 0, 0);
+    aTask->Setup(nullptr);
     return NS_ERROR_FAILURE;
   }
 
   callback->SetStreamNum(streamNum);
   // streamNum reassigns same value when last stream is finished even if
   // callback for stream end isn't called
   // So we cannot use data hashtable and has to add it to vector at last.
   mCallbacks.AppendElement(callback);
 
   return NS_OK;
 }
 
 NS_IMETHODIMP
-SapiService::GetServiceType(SpeechServiceType* aServiceType)
-{
-  *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO;
-  return NS_OK;
-}
-
-NS_IMETHODIMP
 SapiService::Observe(nsISupports* aSubject, const char* aTopic,
                      const char16_t* aData)
 {
   return NS_OK;
 }
 
 SapiService*
 SapiService::GetInstance()
--- a/toolkit/components/narrate/test/browser_narrate.js
+++ b/toolkit/components/narrate/test/browser_narrate.js
@@ -7,17 +7,17 @@
 /* eslint-disable mozilla/no-cpows-in-tests */
 
 registerCleanupFunction(teardown);
 
 add_task(async function testNarrate() {
   setup();
 
   await spawnInNewReaderTab(TEST_ARTICLE, async function() {
-    let TEST_VOICE = "urn:moz-tts:fake-indirect:teresa";
+    let TEST_VOICE = "urn:moz-tts:fake:teresa";
     let $ = content.document.querySelector.bind(content.document);
 
     await NarrateTestUtils.waitForNarrateToggle(content);
 
     let popup = $(NarrateTestUtils.POPUP);
     ok(!NarrateTestUtils.isVisible(popup), "popup is initially hidden");
 
     let toggle = $(NarrateTestUtils.TOGGLE);
--- a/toolkit/components/narrate/test/browser_narrate_language.js
+++ b/toolkit/components/narrate/test/browser_narrate_language.js
@@ -11,63 +11,63 @@ registerCleanupFunction(teardown);
 add_task(async function testVoiceselectDropdownAutoclose() {
   setup("automatic", true);
 
   await spawnInNewReaderTab(TEST_ARTICLE, async function() {
     let $ = content.document.querySelector.bind(content.document);
 
     await NarrateTestUtils.waitForNarrateToggle(content);
 
-    ok(!!$(".option[data-value='urn:moz-tts:fake-direct:bob']"),
+    ok(!!$(".option[data-value='urn:moz-tts:fake:bob']"),
       "Jamaican English voice available");
-    ok(!!$(".option[data-value='urn:moz-tts:fake-direct:lenny']"),
+    ok(!!$(".option[data-value='urn:moz-tts:fake:lenny']"),
       "Canadian English voice available");
-    ok(!!$(".option[data-value='urn:moz-tts:fake-direct:amy']"),
+    ok(!!$(".option[data-value='urn:moz-tts:fake:amy']"),
       "British English voice available");
 
-    ok(!$(".option[data-value='urn:moz-tts:fake-direct:celine']"),
+    ok(!$(".option[data-value='urn:moz-tts:fake:celine']"),
       "Canadian French voice unavailable");
-    ok(!$(".option[data-value='urn:moz-tts:fake-direct:julie']"),
+    ok(!$(".option[data-value='urn:moz-tts:fake:julie']"),
       "Mexican Spanish voice unavailable");
 
     $(NarrateTestUtils.TOGGLE).click();
     ok(NarrateTestUtils.isVisible($(NarrateTestUtils.POPUP)),
       "popup is toggled");
 
     let prefChanged = NarrateTestUtils.waitForPrefChange(
       "narrate.voice", "getCharPref");
-    NarrateTestUtils.selectVoice(content, "urn:moz-tts:fake-direct:lenny");
+    NarrateTestUtils.selectVoice(content, "urn:moz-tts:fake:lenny");
     let voicePref = JSON.parse(await prefChanged);
-    is(voicePref.en, "urn:moz-tts:fake-direct:lenny", "pref set correctly");
+    is(voicePref.en, "urn:moz-tts:fake:lenny", "pref set correctly");
   });
 });
 
 add_task(async function testVoiceselectDropdownAutoclose() {
   setup("automatic", true);
 
   await spawnInNewReaderTab(TEST_ITALIAN_ARTICLE, async function() {
     let $ = content.document.querySelector.bind(content.document);
 
     await NarrateTestUtils.waitForNarrateToggle(content);
 
-    ok(!!$(".option[data-value='urn:moz-tts:fake-indirect:zanetta']"),
+    ok(!!$(".option[data-value='urn:moz-tts:fake:zanetta']"),
       "Italian voice available");
-    ok(!!$(".option[data-value='urn:moz-tts:fake-indirect:margherita']"),
+    ok(!!$(".option[data-value='urn:moz-tts:fake:margherita']"),
       "Italian voice available");
 
-    ok(!$(".option[data-value='urn:moz-tts:fake-direct:bob']"),
+    ok(!$(".option[data-value='urn:moz-tts:fake:bob']"),
       "Jamaican English voice available");
-    ok(!$(".option[data-value='urn:moz-tts:fake-direct:celine']"),
+    ok(!$(".option[data-value='urn:moz-tts:fake:celine']"),
       "Canadian French voice unavailable");
-    ok(!$(".option[data-value='urn:moz-tts:fake-direct:julie']"),
+    ok(!$(".option[data-value='urn:moz-tts:fake:julie']"),
       "Mexican Spanish voice unavailable");
 
     $(NarrateTestUtils.TOGGLE).click();
     ok(NarrateTestUtils.isVisible($(NarrateTestUtils.POPUP)),
       "popup is toggled");
 
     let prefChanged = NarrateTestUtils.waitForPrefChange(
       "narrate.voice", "getCharPref");
-    NarrateTestUtils.selectVoice(content, "urn:moz-tts:fake-indirect:zanetta");
+    NarrateTestUtils.selectVoice(content, "urn:moz-tts:fake:zanetta");
     let voicePref = JSON.parse(await prefChanged);
-    is(voicePref.it, "urn:moz-tts:fake-indirect:zanetta", "pref set correctly");
+    is(voicePref.it, "urn:moz-tts:fake:zanetta", "pref set correctly");
   });
 });
--- a/toolkit/components/narrate/test/browser_voiceselect.js
+++ b/toolkit/components/narrate/test/browser_voiceselect.js
@@ -50,17 +50,17 @@ add_task(async function testVoiceselectL
     let $ = content.document.querySelector.bind(content.document);
 
     await NarrateTestUtils.waitForNarrateToggle(content);
 
     $(NarrateTestUtils.TOGGLE).click();
     ok(NarrateTestUtils.isVisible($(NarrateTestUtils.POPUP)),
       "popup is toggled");
 
-    ok(NarrateTestUtils.selectVoice(content, "urn:moz-tts:fake-direct:lenny"),
+    ok(NarrateTestUtils.selectVoice(content, "urn:moz-tts:fake:lenny"),
       "voice selected");
 
     let selectedOption = $(NarrateTestUtils.VOICE_SELECTED);
     let selectLabel = $(NarrateTestUtils.VOICE_SELECT_LABEL);
 
     is(selectedOption.textContent, selectLabel.textContent,
       "new label matches selected voice");
   });
--- a/toolkit/components/narrate/test/browser_word_highlight.js
+++ b/toolkit/components/narrate/test/browser_word_highlight.js
@@ -4,17 +4,17 @@
 
 "use strict";
 
 /* eslint-disable mozilla/no-cpows-in-tests */
 
 registerCleanupFunction(teardown);
 
 add_task(async function testNarrate() {
-  setup("urn:moz-tts:fake-indirect:teresa");
+  setup("urn:moz-tts:fake:teresa");
 
   await spawnInNewReaderTab(TEST_ARTICLE, async function() {
     let $ = content.document.querySelector.bind(content.document);
 
     await NarrateTestUtils.waitForNarrateToggle(content);
 
     let popup = $(NarrateTestUtils.POPUP);
     ok(!NarrateTestUtils.isVisible(popup), "popup is initially hidden");
@@ -24,17 +24,17 @@ add_task(async function testNarrate() {
 
     ok(NarrateTestUtils.isVisible(popup), "popup toggled");
 
     NarrateTestUtils.isStoppedState(content, ok);
 
     let promiseEvent = ContentTaskUtils.waitForEvent(content, "paragraphstart");
     $(NarrateTestUtils.START).click();
     let voice = (await promiseEvent).detail.voice;
-    is(voice, "urn:moz-tts:fake-indirect:teresa", "double-check voice");
+    is(voice, "urn:moz-tts:fake:teresa", "double-check voice");
 
     // Skip forward to first paragraph.
     let details;
     do {
       promiseEvent = ContentTaskUtils.waitForEvent(content, "paragraphstart");
       $(NarrateTestUtils.FORWARD).click();
       details = (await promiseEvent).detail;
     } while (details.tag != "p");