Bug 1333641 - Part 1: Making the speechSynthesis API unfingerprintable when 'privacy.resistFingerprinting' is true. r?smaug,arthuredelstein draft
authorTim Huang <tihuang@mozilla.com>
Thu, 20 Jul 2017 16:07:32 +0800
changeset 614926 e53fe57cadf54d3ecbe59e2c1eb4af8594d6fab5
parent 611714 eb1d92b2b6a4161492561250f51bae5bafeda68a
child 614927 8fc2596fd64e573040357c389a671a3620e02542
push id70179
push userbmo:tihuang@mozilla.com
push dateTue, 25 Jul 2017 06:56:38 +0000
reviewerssmaug, arthuredelstein
bugs1333641
milestone56.0a1
Bug 1333641 - Part 1: Making the speechSynthesis API unfingerprintable when 'privacy.resistFingerprinting' is true. r?smaug,arthuredelstein The patch will change the behavior of speechSynthesis API when fingerprinting resistance is enabled. First, the speechSynthesis.getVoices() will always report an empty list and the speechSynthesis.onvoiceschanged event will be blocked. And it will immediately fail when using the speechSynthesis.speak() to speak an utterance. By doing so, websites can no longer fingerprint users through this speechSynthesis API. In addition, this only affect contents, so the chrome can still use this API even the fingerprinting resistance is enabled. MozReview-Commit-ID: KxJX8fo30WS
dom/media/webspeech/synth/SpeechSynthesis.cpp
dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
dom/media/webspeech/synth/nsSpeechTask.cpp
dom/media/webspeech/synth/nsSpeechTask.h
dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
--- a/dom/media/webspeech/synth/SpeechSynthesis.cpp
+++ b/dom/media/webspeech/synth/SpeechSynthesis.cpp
@@ -9,18 +9,20 @@
 #include "mozilla/Logging.h"
 #include "mozilla/SizePrintfMacros.h"
 
 #include "mozilla/dom/ContentChild.h"
 #include "mozilla/dom/Element.h"
 
 #include "mozilla/dom/SpeechSynthesisBinding.h"
 #include "SpeechSynthesis.h"
+#include "nsContentUtils.h"
 #include "nsSynthVoiceRegistry.h"
 #include "nsIDocument.h"
+#include "nsIDocShell.h"
 
 #undef LOG
 mozilla::LogModule*
 GetSpeechSynthLog()
 {
   static mozilla::LazyLogModule sLog("SpeechSynthesis");
 
   return sLog;
@@ -246,16 +248,23 @@ SpeechSynthesis::OnEnd(const nsSpeechTas
   AdvanceQueue();
 }
 
 void
 SpeechSynthesis::GetVoices(nsTArray< RefPtr<SpeechSynthesisVoice> >& aResult)
 {
   aResult.Clear();
   uint32_t voiceCount = 0;
+  nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+  nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
+
+
+  if (nsContentUtils::ShouldResistFingerprinting(docShell)) {
+    return;
+  }
 
   nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);
   if(NS_WARN_IF(NS_FAILED(rv))) {
     return;
   }
 
   nsISupports* voiceParent = NS_ISUPPORTS_CAST(nsIObserver*, this);
 
@@ -315,20 +324,25 @@ SpeechSynthesis::Observe(nsISupports* aS
 
       nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
       if (obs) {
         obs->RemoveObserver(this, "inner-window-destroyed");
       }
     }
   } else if (strcmp(aTopic, "synth-voices-changed") == 0) {
     LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged"));
-    DispatchTrustedEvent(NS_LITERAL_STRING("voiceschanged"));
-    // If we have a pending item, and voices become available, speak it.
-    if (!mCurrentTask && !mHoldQueue && HasVoices()) {
-      AdvanceQueue();
+    nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+    nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
+
+    if (!nsContentUtils::ShouldResistFingerprinting(docShell)) {
+      DispatchTrustedEvent(NS_LITERAL_STRING("voiceschanged"));
+      // If we have a pending item, and voices become available, speak it.
+      if (!mCurrentTask && !mHoldQueue && HasVoices()) {
+        AdvanceQueue();
+      }
     }
   }
 
   return NS_OK;
 }
 
 } // namespace dom
 } // namespace mozilla
--- a/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
+++ b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
@@ -37,13 +37,13 @@ child:
 
     async InitialVoicesAndState(RemoteVoice[] aVoices, nsString[] aDefaults,
                                 bool aIsSpeaking);
 
 parent:
     async __delete__();
 
     async PSpeechSynthesisRequest(nsString aText, nsString aUri, nsString aLang,
-                                  float aVolume, float aRate, float aPitch);
+                                  float aVolume, float aRate, float aPitch, bool aIsChrome);
 };
 
 } // namespace dom
 } // namespace mozilla
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
@@ -64,17 +64,18 @@ SpeechSynthesisChild::RecvNotifyVoicesCh
 }
 
 PSpeechSynthesisRequestChild*
 SpeechSynthesisChild::AllocPSpeechSynthesisRequestChild(const nsString& aText,
                                                         const nsString& aLang,
                                                         const nsString& aUri,
                                                         const float& aVolume,
                                                         const float& aRate,
-                                                        const float& aPitch)
+                                                        const float& aPitch,
+                                                        const bool& aIsChrome)
 {
   MOZ_CRASH("Caller is supposed to manually construct a request!");
 }
 
 bool
 SpeechSynthesisChild::DeallocPSpeechSynthesisRequestChild(PSpeechSynthesisRequestChild* aActor)
 {
   delete aActor;
@@ -154,18 +155,18 @@ SpeechSynthesisRequestChild::RecvOnMark(
                                         const uint32_t& aCharIndex)
 {
   mTask->DispatchMarkImpl(aName, aElapsedTime, aCharIndex);
   return IPC_OK();
 }
 
 // SpeechTaskChild
 
-SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance)
-  : nsSpeechTask(aUtterance)
+SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
+  : nsSpeechTask(aUtterance, aIsChrome)
 {
 }
 
 NS_IMETHODIMP
 SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback,
                        uint32_t aChannels, uint32_t aRate, uint8_t argc)
 {
   MOZ_CRASH("Should never be called from child");
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
@@ -40,17 +40,18 @@ protected:
   SpeechSynthesisChild();
   virtual ~SpeechSynthesisChild();
 
   PSpeechSynthesisRequestChild* AllocPSpeechSynthesisRequestChild(const nsString& aLang,
                                                                   const nsString& aUri,
                                                                   const nsString& aText,
                                                                   const float& aVolume,
                                                                   const float& aPitch,
-                                                                  const float& aRate) override;
+                                                                  const float& aRate,
+                                                                  const bool& aIsChrome) override;
   bool DeallocPSpeechSynthesisRequestChild(PSpeechSynthesisRequestChild* aActor) override;
 };
 
 class SpeechSynthesisRequestChild : public PSpeechSynthesisRequestChild
 {
 public:
   explicit SpeechSynthesisRequestChild(SpeechTaskChild* aTask);
   virtual ~SpeechSynthesisRequestChild();
@@ -77,17 +78,17 @@ protected:
   RefPtr<SpeechTaskChild> mTask;
 };
 
 class SpeechTaskChild : public nsSpeechTask
 {
   friend class SpeechSynthesisRequestChild;
 public:
 
-  explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance);
+  explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome);
 
   NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback,
                    uint32_t aChannels, uint32_t aRate, uint8_t argc) override;
 
   NS_IMETHOD SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks,
                        JSContext* aCx) override;
 
   NS_IMETHOD SendAudioNative(int16_t* aData, uint32_t aDataLen) override;
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
@@ -31,19 +31,20 @@ SpeechSynthesisParent::SendInit()
 }
 
 PSpeechSynthesisRequestParent*
 SpeechSynthesisParent::AllocPSpeechSynthesisRequestParent(const nsString& aText,
                                                           const nsString& aLang,
                                                           const nsString& aUri,
                                                           const float& aVolume,
                                                           const float& aRate,
-                                                          const float& aPitch)
+                                                          const float& aPitch,
+                                                          const bool& aIsChrome)
 {
-  RefPtr<SpeechTaskParent> task = new SpeechTaskParent(aVolume, aText);
+  RefPtr<SpeechTaskParent> task = new SpeechTaskParent(aVolume, aText, aIsChrome);
   SpeechSynthesisRequestParent* actor = new SpeechSynthesisRequestParent(task);
   return actor;
 }
 
 bool
 SpeechSynthesisParent::DeallocPSpeechSynthesisRequestParent(PSpeechSynthesisRequestParent* aActor)
 {
   delete aActor;
@@ -52,17 +53,18 @@ SpeechSynthesisParent::DeallocPSpeechSyn
 
 mozilla::ipc::IPCResult
 SpeechSynthesisParent::RecvPSpeechSynthesisRequestConstructor(PSpeechSynthesisRequestParent* aActor,
                                                               const nsString& aText,
                                                               const nsString& aLang,
                                                               const nsString& aUri,
                                                               const float& aVolume,
                                                               const float& aRate,
-                                                              const float& aPitch)
+                                                              const float& aPitch,
+                                                              const bool& aIsChrome)
 {
   MOZ_ASSERT(aActor);
   SpeechSynthesisRequestParent* actor =
     static_cast<SpeechSynthesisRequestParent*>(aActor);
   nsSynthVoiceRegistry::GetInstance()->Speak(aText, aLang, aUri, aVolume, aRate,
                                              aPitch, actor->mTask);
   return IPC_OK();
 }
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
@@ -29,28 +29,30 @@ public:
 protected:
   SpeechSynthesisParent();
   virtual ~SpeechSynthesisParent();
   PSpeechSynthesisRequestParent* AllocPSpeechSynthesisRequestParent(const nsString& aText,
                                                                     const nsString& aLang,
                                                                     const nsString& aUri,
                                                                     const float& aVolume,
                                                                     const float& aRate,
-                                                                    const float& aPitch)
+                                                                    const float& aPitch,
+                                                                    const bool& aIsChrome)
                                                                     override;
 
   bool DeallocPSpeechSynthesisRequestParent(PSpeechSynthesisRequestParent* aActor) override;
 
   mozilla::ipc::IPCResult RecvPSpeechSynthesisRequestConstructor(PSpeechSynthesisRequestParent* aActor,
                                                                  const nsString& aText,
                                                                  const nsString& aLang,
                                                                  const nsString& aUri,
                                                                  const float& aVolume,
                                                                  const float& aRate,
-                                                                 const float& aPitch) override;
+                                                                 const float& aPitch,
+                                                                 const bool& aIsChrome) override;
 };
 
 class SpeechSynthesisRequestParent : public PSpeechSynthesisRequestParent
 {
 public:
   explicit SpeechSynthesisRequestParent(SpeechTaskParent* aTask);
   virtual ~SpeechSynthesisRequestParent();
 
@@ -72,18 +74,18 @@ protected:
 
   mozilla::ipc::IPCResult Recv__delete__() override;
 };
 
 class SpeechTaskParent : public nsSpeechTask
 {
   friend class SpeechSynthesisRequestParent;
 public:
-  SpeechTaskParent(float aVolume, const nsAString& aUtterance)
-    : nsSpeechTask(aVolume, aUtterance) {}
+  SpeechTaskParent(float aVolume, const nsAString& aUtterance, bool aIsChrome)
+    : nsSpeechTask(aVolume, aUtterance, aIsChrome) {}
 
   nsresult DispatchStartImpl(const nsAString& aUri);
 
   nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex);
 
   nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex);
 
   nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex);
--- a/dom/media/webspeech/synth/nsSpeechTask.cpp
+++ b/dom/media/webspeech/synth/nsSpeechTask.cpp
@@ -115,37 +115,39 @@ NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(
   NS_INTERFACE_MAP_ENTRY(nsIAudioChannelAgentCallback)
   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask)
 NS_INTERFACE_MAP_END
 
 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask)
 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
 
-nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance)
+nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
   : mUtterance(aUtterance)
   , mInited(false)
   , mPrePaused(false)
   , mPreCanceled(false)
   , mCallback(nullptr)
   , mIndirectAudio(false)
+  , mIsChrome(aIsChrome)
 {
   mText = aUtterance->mText;
   mVolume = aUtterance->Volume();
 }
 
-nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText)
+nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome)
   : mUtterance(nullptr)
   , mVolume(aVolume)
   , mText(aText)
   , mInited(false)
   , mPrePaused(false)
   , mPreCanceled(false)
   , mCallback(nullptr)
   , mIndirectAudio(false)
+  , mIsChrome(aIsChrome)
 {
 }
 
 nsSpeechTask::~nsSpeechTask()
 {
   LOG(LogLevel::Debug, ("~nsSpeechTask"));
   if (mStream) {
     if (!mStream->IsDestroyed()) {
@@ -508,26 +510,38 @@ nsSpeechTask::DispatchResumeImpl(float a
     mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("resume"),
                                              aCharIndex, nullptr, aElapsedTime,
                                              EmptyString());
   }
 
   return NS_OK;
 }
 
+void
+nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex)
+{
+  DispatchErrorInner(aElapsedTime, aCharIndex);
+}
+
 NS_IMETHODIMP
 nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex)
 {
   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchError"));
 
   if (!mIndirectAudio) {
     NS_WARNING("Can't call DispatchError() from a direct audio speech service");
     return NS_ERROR_FAILURE;
   }
 
+  return DispatchErrorInner(aElapsedTime, aCharIndex);
+}
+
+nsresult
+nsSpeechTask::DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex)
+{
   if (!mPreCanceled) {
     nsSynthVoiceRegistry::GetInstance()->SpeakNext();
   }
 
   return DispatchErrorImpl(aElapsedTime, aCharIndex);
 }
 
 nsresult
--- a/dom/media/webspeech/synth/nsSpeechTask.h
+++ b/dom/media/webspeech/synth/nsSpeechTask.h
@@ -30,18 +30,18 @@ class nsSpeechTask : public nsISpeechTas
 
 public:
   NS_DECL_CYCLE_COLLECTING_ISUPPORTS
   NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsSpeechTask, nsISpeechTask)
 
   NS_DECL_NSISPEECHTASK
   NS_DECL_NSIAUDIOCHANNELAGENTCALLBACK
 
-  explicit nsSpeechTask(SpeechSynthesisUtterance* aUtterance);
-  nsSpeechTask(float aVolume, const nsAString& aText);
+  explicit nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome);
+  nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome);
 
   virtual void Pause();
 
   virtual void Resume();
 
   virtual void Cancel();
 
   virtual void ForceEnd();
@@ -54,26 +54,33 @@ public:
 
   void InitDirectAudio();
   void InitIndirectAudio();
 
   void SetChosenVoiceURI(const nsAString& aUri);
 
   virtual void SetAudioOutputVolume(float aVolume);
 
+  void ForceError(float aElapsedTime, uint32_t aCharIndex);
+
   bool IsPreCanceled()
   {
     return mPreCanceled;
   };
 
   bool IsPrePaused()
   {
     return mPrePaused;
   }
 
+  bool IsChrome()
+  {
+    return mIsChrome;
+  }
+
 protected:
   virtual ~nsSpeechTask();
 
   nsresult DispatchStartImpl();
 
   virtual nsresult DispatchStartImpl(const nsAString& aUri);
 
   virtual nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex);
@@ -107,16 +114,17 @@ protected:
 
 private:
   void End();
 
   void SendAudioImpl(RefPtr<mozilla::SharedBuffer>& aSamples, uint32_t aDataLen);
 
   nsresult DispatchStartInner();
 
+  nsresult DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex);
   nsresult DispatchEndInner(float aElapsedTime, uint32_t aCharIndex);
 
   void CreateAudioChannelAgent();
 
   void DestroyAudioChannelAgent();
 
   RefPtr<SourceMediaStream> mStream;
 
@@ -128,14 +136,16 @@ private:
 
   uint32_t mChannels;
 
   RefPtr<SpeechSynthesis> mSpeechSynthesis;
 
   bool mIndirectAudio;
 
   nsString mChosenVoiceURI;
+
+  bool mIsChrome;
 };
 
 } // namespace dom
 } // namespace mozilla
 
 #endif
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
@@ -674,30 +674,36 @@ nsSynthVoiceRegistry::SpeakUtterance(Spe
       // TODO : use audio channel agent, open new bug to fix it.
       uint32_t channel = static_cast<uint32_t>(AudioChannelService::GetDefaultAudioChannel());
       AudioPlaybackConfig config = service->GetMediaConfig(topWindow->GetOuterWindow(),
                                                            channel);
       volume = config.mMuted ? 0.0f : config.mVolume * volume;
     }
   }
 
+  nsCOMPtr<nsPIDOMWindowInner> window = aUtterance.GetOwner();
+  nsCOMPtr<nsIDocument> doc = window ? window->GetDoc() : nullptr;
+
+  bool isChrome = nsContentUtils::IsChromeDoc(doc);
+
   RefPtr<nsSpeechTask> task;
   if (XRE_IsContentProcess()) {
-    task = new SpeechTaskChild(&aUtterance);
+    task = new SpeechTaskChild(&aUtterance, isChrome);
     SpeechSynthesisRequestChild* actor =
       new SpeechSynthesisRequestChild(static_cast<SpeechTaskChild*>(task.get()));
     mSpeechSynthChild->SendPSpeechSynthesisRequestConstructor(actor,
                                                               aUtterance.mText,
                                                               lang,
                                                               uri,
                                                               volume,
                                                               aUtterance.Rate(),
-                                                              aUtterance.Pitch());
+                                                              aUtterance.Pitch(),
+                                                              isChrome);
   } else {
-    task = new nsSpeechTask(&aUtterance);
+    task = new nsSpeechTask(&aUtterance, isChrome);
     Speak(aUtterance.mText, lang, uri,
           volume, aUtterance.Rate(), aUtterance.Pitch(), task);
   }
 
   return task.forget();
 }
 
 void
@@ -706,21 +712,26 @@ nsSynthVoiceRegistry::Speak(const nsAStr
                             const nsAString& aUri,
                             const float& aVolume,
                             const float& aRate,
                             const float& aPitch,
                             nsSpeechTask* aTask)
 {
   MOZ_ASSERT(XRE_IsParentProcess());
 
+  if (!aTask->IsChrome() && nsContentUtils::ShouldResistFingerprinting()) {
+    aTask->ForceError(0, 0);
+    return;
+  }
+
   VoiceData* voice = FindBestMatch(aUri, aLang);
 
   if (!voice) {
     NS_WARNING("No voices found.");
-    aTask->DispatchError(0, 0);
+    aTask->ForceError(0, 0);
     return;
   }
 
   aTask->SetChosenVoiceURI(voice->mUri);
 
   if (mUseGlobalQueue || MediaPrefs::WebSpeechForceGlobal()) {
     LOG(LogLevel::Debug,
         ("nsSynthVoiceRegistry::Speak queueing text='%s' lang='%s' uri='%s' rate=%f pitch=%f",