Bug 1333641 - Part 1: Making the speechSynthesis API unfingerprintable when 'privacy.resistFingerprinting' is true. r?smaug,arthuredelstein
The patch will change the behavior of speechSynthesis API when fingerprinting
resistance is enabled. First, the speechSynthesis.getVoices() will always report
an empty list and the speechSynthesis.onvoiceschanged event will be blocked.
And it will immediately fail when using the speechSynthesis.speak() to speak an
utterance. By doing so, websites can no longer fingerprint users through this
speechSynthesis API.
In addition, this only affect contents, so the chrome can still use this API even
the fingerprinting resistance is enabled.
MozReview-Commit-ID: KxJX8fo30WS
--- a/dom/media/webspeech/synth/SpeechSynthesis.cpp
+++ b/dom/media/webspeech/synth/SpeechSynthesis.cpp
@@ -9,18 +9,20 @@
#include "mozilla/Logging.h"
#include "mozilla/SizePrintfMacros.h"
#include "mozilla/dom/ContentChild.h"
#include "mozilla/dom/Element.h"
#include "mozilla/dom/SpeechSynthesisBinding.h"
#include "SpeechSynthesis.h"
+#include "nsContentUtils.h"
#include "nsSynthVoiceRegistry.h"
#include "nsIDocument.h"
+#include "nsIDocShell.h"
#undef LOG
mozilla::LogModule*
GetSpeechSynthLog()
{
static mozilla::LazyLogModule sLog("SpeechSynthesis");
return sLog;
@@ -246,16 +248,23 @@ SpeechSynthesis::OnEnd(const nsSpeechTas
AdvanceQueue();
}
void
SpeechSynthesis::GetVoices(nsTArray< RefPtr<SpeechSynthesisVoice> >& aResult)
{
aResult.Clear();
uint32_t voiceCount = 0;
+ nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+ nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
+
+
+ if (nsContentUtils::ShouldResistFingerprinting(docShell)) {
+ return;
+ }
nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);
if(NS_WARN_IF(NS_FAILED(rv))) {
return;
}
nsISupports* voiceParent = NS_ISUPPORTS_CAST(nsIObserver*, this);
@@ -315,20 +324,25 @@ SpeechSynthesis::Observe(nsISupports* aS
nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
if (obs) {
obs->RemoveObserver(this, "inner-window-destroyed");
}
}
} else if (strcmp(aTopic, "synth-voices-changed") == 0) {
LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged"));
- DispatchTrustedEvent(NS_LITERAL_STRING("voiceschanged"));
- // If we have a pending item, and voices become available, speak it.
- if (!mCurrentTask && !mHoldQueue && HasVoices()) {
- AdvanceQueue();
+ nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+ nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
+
+ if (!nsContentUtils::ShouldResistFingerprinting(docShell)) {
+ DispatchTrustedEvent(NS_LITERAL_STRING("voiceschanged"));
+ // If we have a pending item, and voices become available, speak it.
+ if (!mCurrentTask && !mHoldQueue && HasVoices()) {
+ AdvanceQueue();
+ }
}
}
return NS_OK;
}
} // namespace dom
} // namespace mozilla
--- a/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
+++ b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
@@ -37,13 +37,13 @@ child:
async InitialVoicesAndState(RemoteVoice[] aVoices, nsString[] aDefaults,
bool aIsSpeaking);
parent:
async __delete__();
async PSpeechSynthesisRequest(nsString aText, nsString aUri, nsString aLang,
- float aVolume, float aRate, float aPitch);
+ float aVolume, float aRate, float aPitch, bool aIsChrome);
};
} // namespace dom
} // namespace mozilla
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
@@ -64,17 +64,18 @@ SpeechSynthesisChild::RecvNotifyVoicesCh
}
PSpeechSynthesisRequestChild*
SpeechSynthesisChild::AllocPSpeechSynthesisRequestChild(const nsString& aText,
const nsString& aLang,
const nsString& aUri,
const float& aVolume,
const float& aRate,
- const float& aPitch)
+ const float& aPitch,
+ const bool& aIsChrome)
{
MOZ_CRASH("Caller is supposed to manually construct a request!");
}
bool
SpeechSynthesisChild::DeallocPSpeechSynthesisRequestChild(PSpeechSynthesisRequestChild* aActor)
{
delete aActor;
@@ -154,18 +155,18 @@ SpeechSynthesisRequestChild::RecvOnMark(
const uint32_t& aCharIndex)
{
mTask->DispatchMarkImpl(aName, aElapsedTime, aCharIndex);
return IPC_OK();
}
// SpeechTaskChild
-SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance)
- : nsSpeechTask(aUtterance)
+SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
+ : nsSpeechTask(aUtterance, aIsChrome)
{
}
NS_IMETHODIMP
SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback,
uint32_t aChannels, uint32_t aRate, uint8_t argc)
{
MOZ_CRASH("Should never be called from child");
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
@@ -40,17 +40,18 @@ protected:
SpeechSynthesisChild();
virtual ~SpeechSynthesisChild();
PSpeechSynthesisRequestChild* AllocPSpeechSynthesisRequestChild(const nsString& aLang,
const nsString& aUri,
const nsString& aText,
const float& aVolume,
const float& aPitch,
- const float& aRate) override;
+ const float& aRate,
+ const bool& aIsChrome) override;
bool DeallocPSpeechSynthesisRequestChild(PSpeechSynthesisRequestChild* aActor) override;
};
class SpeechSynthesisRequestChild : public PSpeechSynthesisRequestChild
{
public:
explicit SpeechSynthesisRequestChild(SpeechTaskChild* aTask);
virtual ~SpeechSynthesisRequestChild();
@@ -77,17 +78,17 @@ protected:
RefPtr<SpeechTaskChild> mTask;
};
class SpeechTaskChild : public nsSpeechTask
{
friend class SpeechSynthesisRequestChild;
public:
- explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance);
+ explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome);
NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback,
uint32_t aChannels, uint32_t aRate, uint8_t argc) override;
NS_IMETHOD SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks,
JSContext* aCx) override;
NS_IMETHOD SendAudioNative(int16_t* aData, uint32_t aDataLen) override;
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
@@ -31,19 +31,20 @@ SpeechSynthesisParent::SendInit()
}
PSpeechSynthesisRequestParent*
SpeechSynthesisParent::AllocPSpeechSynthesisRequestParent(const nsString& aText,
const nsString& aLang,
const nsString& aUri,
const float& aVolume,
const float& aRate,
- const float& aPitch)
+ const float& aPitch,
+ const bool& aIsChrome)
{
- RefPtr<SpeechTaskParent> task = new SpeechTaskParent(aVolume, aText);
+ RefPtr<SpeechTaskParent> task = new SpeechTaskParent(aVolume, aText, aIsChrome);
SpeechSynthesisRequestParent* actor = new SpeechSynthesisRequestParent(task);
return actor;
}
bool
SpeechSynthesisParent::DeallocPSpeechSynthesisRequestParent(PSpeechSynthesisRequestParent* aActor)
{
delete aActor;
@@ -52,17 +53,18 @@ SpeechSynthesisParent::DeallocPSpeechSyn
mozilla::ipc::IPCResult
SpeechSynthesisParent::RecvPSpeechSynthesisRequestConstructor(PSpeechSynthesisRequestParent* aActor,
const nsString& aText,
const nsString& aLang,
const nsString& aUri,
const float& aVolume,
const float& aRate,
- const float& aPitch)
+ const float& aPitch,
+ const bool& aIsChrome)
{
MOZ_ASSERT(aActor);
SpeechSynthesisRequestParent* actor =
static_cast<SpeechSynthesisRequestParent*>(aActor);
nsSynthVoiceRegistry::GetInstance()->Speak(aText, aLang, aUri, aVolume, aRate,
aPitch, actor->mTask);
return IPC_OK();
}
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
@@ -29,28 +29,30 @@ public:
protected:
SpeechSynthesisParent();
virtual ~SpeechSynthesisParent();
PSpeechSynthesisRequestParent* AllocPSpeechSynthesisRequestParent(const nsString& aText,
const nsString& aLang,
const nsString& aUri,
const float& aVolume,
const float& aRate,
- const float& aPitch)
+ const float& aPitch,
+ const bool& aIsChrome)
override;
bool DeallocPSpeechSynthesisRequestParent(PSpeechSynthesisRequestParent* aActor) override;
mozilla::ipc::IPCResult RecvPSpeechSynthesisRequestConstructor(PSpeechSynthesisRequestParent* aActor,
const nsString& aText,
const nsString& aLang,
const nsString& aUri,
const float& aVolume,
const float& aRate,
- const float& aPitch) override;
+ const float& aPitch,
+ const bool& aIsChrome) override;
};
class SpeechSynthesisRequestParent : public PSpeechSynthesisRequestParent
{
public:
explicit SpeechSynthesisRequestParent(SpeechTaskParent* aTask);
virtual ~SpeechSynthesisRequestParent();
@@ -72,18 +74,18 @@ protected:
mozilla::ipc::IPCResult Recv__delete__() override;
};
class SpeechTaskParent : public nsSpeechTask
{
friend class SpeechSynthesisRequestParent;
public:
- SpeechTaskParent(float aVolume, const nsAString& aUtterance)
- : nsSpeechTask(aVolume, aUtterance) {}
+ SpeechTaskParent(float aVolume, const nsAString& aUtterance, bool aIsChrome)
+ : nsSpeechTask(aVolume, aUtterance, aIsChrome) {}
nsresult DispatchStartImpl(const nsAString& aUri);
nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex);
nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex);
nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex);
--- a/dom/media/webspeech/synth/nsSpeechTask.cpp
+++ b/dom/media/webspeech/synth/nsSpeechTask.cpp
@@ -115,37 +115,39 @@ NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(
NS_INTERFACE_MAP_ENTRY(nsIAudioChannelAgentCallback)
NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask)
NS_INTERFACE_MAP_END
NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask)
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
-nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance)
+nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
: mUtterance(aUtterance)
, mInited(false)
, mPrePaused(false)
, mPreCanceled(false)
, mCallback(nullptr)
, mIndirectAudio(false)
+ , mIsChrome(aIsChrome)
{
mText = aUtterance->mText;
mVolume = aUtterance->Volume();
}
-nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText)
+nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome)
: mUtterance(nullptr)
, mVolume(aVolume)
, mText(aText)
, mInited(false)
, mPrePaused(false)
, mPreCanceled(false)
, mCallback(nullptr)
, mIndirectAudio(false)
+ , mIsChrome(aIsChrome)
{
}
nsSpeechTask::~nsSpeechTask()
{
LOG(LogLevel::Debug, ("~nsSpeechTask"));
if (mStream) {
if (!mStream->IsDestroyed()) {
@@ -508,26 +510,38 @@ nsSpeechTask::DispatchResumeImpl(float a
mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("resume"),
aCharIndex, nullptr, aElapsedTime,
EmptyString());
}
return NS_OK;
}
+void
+nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex)
+{
+ DispatchErrorInner(aElapsedTime, aCharIndex);
+}
+
NS_IMETHODIMP
nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex)
{
LOG(LogLevel::Debug, ("nsSpeechTask::DispatchError"));
if (!mIndirectAudio) {
NS_WARNING("Can't call DispatchError() from a direct audio speech service");
return NS_ERROR_FAILURE;
}
+ return DispatchErrorInner(aElapsedTime, aCharIndex);
+}
+
+nsresult
+nsSpeechTask::DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex)
+{
if (!mPreCanceled) {
nsSynthVoiceRegistry::GetInstance()->SpeakNext();
}
return DispatchErrorImpl(aElapsedTime, aCharIndex);
}
nsresult
--- a/dom/media/webspeech/synth/nsSpeechTask.h
+++ b/dom/media/webspeech/synth/nsSpeechTask.h
@@ -30,18 +30,18 @@ class nsSpeechTask : public nsISpeechTas
public:
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsSpeechTask, nsISpeechTask)
NS_DECL_NSISPEECHTASK
NS_DECL_NSIAUDIOCHANNELAGENTCALLBACK
- explicit nsSpeechTask(SpeechSynthesisUtterance* aUtterance);
- nsSpeechTask(float aVolume, const nsAString& aText);
+ explicit nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome);
+ nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome);
virtual void Pause();
virtual void Resume();
virtual void Cancel();
virtual void ForceEnd();
@@ -54,26 +54,33 @@ public:
void InitDirectAudio();
void InitIndirectAudio();
void SetChosenVoiceURI(const nsAString& aUri);
virtual void SetAudioOutputVolume(float aVolume);
+ void ForceError(float aElapsedTime, uint32_t aCharIndex);
+
bool IsPreCanceled()
{
return mPreCanceled;
};
bool IsPrePaused()
{
return mPrePaused;
}
+ bool IsChrome()
+ {
+ return mIsChrome;
+ }
+
protected:
virtual ~nsSpeechTask();
nsresult DispatchStartImpl();
virtual nsresult DispatchStartImpl(const nsAString& aUri);
virtual nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex);
@@ -107,16 +114,17 @@ protected:
private:
void End();
void SendAudioImpl(RefPtr<mozilla::SharedBuffer>& aSamples, uint32_t aDataLen);
nsresult DispatchStartInner();
+ nsresult DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex);
nsresult DispatchEndInner(float aElapsedTime, uint32_t aCharIndex);
void CreateAudioChannelAgent();
void DestroyAudioChannelAgent();
RefPtr<SourceMediaStream> mStream;
@@ -128,14 +136,16 @@ private:
uint32_t mChannels;
RefPtr<SpeechSynthesis> mSpeechSynthesis;
bool mIndirectAudio;
nsString mChosenVoiceURI;
+
+ bool mIsChrome;
};
} // namespace dom
} // namespace mozilla
#endif
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
@@ -674,30 +674,36 @@ nsSynthVoiceRegistry::SpeakUtterance(Spe
// TODO : use audio channel agent, open new bug to fix it.
uint32_t channel = static_cast<uint32_t>(AudioChannelService::GetDefaultAudioChannel());
AudioPlaybackConfig config = service->GetMediaConfig(topWindow->GetOuterWindow(),
channel);
volume = config.mMuted ? 0.0f : config.mVolume * volume;
}
}
+ nsCOMPtr<nsPIDOMWindowInner> window = aUtterance.GetOwner();
+ nsCOMPtr<nsIDocument> doc = window ? window->GetDoc() : nullptr;
+
+ bool isChrome = nsContentUtils::IsChromeDoc(doc);
+
RefPtr<nsSpeechTask> task;
if (XRE_IsContentProcess()) {
- task = new SpeechTaskChild(&aUtterance);
+ task = new SpeechTaskChild(&aUtterance, isChrome);
SpeechSynthesisRequestChild* actor =
new SpeechSynthesisRequestChild(static_cast<SpeechTaskChild*>(task.get()));
mSpeechSynthChild->SendPSpeechSynthesisRequestConstructor(actor,
aUtterance.mText,
lang,
uri,
volume,
aUtterance.Rate(),
- aUtterance.Pitch());
+ aUtterance.Pitch(),
+ isChrome);
} else {
- task = new nsSpeechTask(&aUtterance);
+ task = new nsSpeechTask(&aUtterance, isChrome);
Speak(aUtterance.mText, lang, uri,
volume, aUtterance.Rate(), aUtterance.Pitch(), task);
}
return task.forget();
}
void
@@ -706,21 +712,26 @@ nsSynthVoiceRegistry::Speak(const nsAStr
const nsAString& aUri,
const float& aVolume,
const float& aRate,
const float& aPitch,
nsSpeechTask* aTask)
{
MOZ_ASSERT(XRE_IsParentProcess());
+ if (!aTask->IsChrome() && nsContentUtils::ShouldResistFingerprinting()) {
+ aTask->ForceError(0, 0);
+ return;
+ }
+
VoiceData* voice = FindBestMatch(aUri, aLang);
if (!voice) {
NS_WARNING("No voices found.");
- aTask->DispatchError(0, 0);
+ aTask->ForceError(0, 0);
return;
}
aTask->SetChosenVoiceURI(voice->mUri);
if (mUseGlobalQueue || MediaPrefs::WebSpeechForceGlobal()) {
LOG(LogLevel::Debug,
("nsSynthVoiceRegistry::Speak queueing text='%s' lang='%s' uri='%s' rate=%f pitch=%f",