Bug 1264199: P6. Drain resampler when changing format or reaching the end. r=kinetik
MozReview-Commit-ID: KqcB0FYxNtC
--- a/dom/media/AudioConverter.cpp
+++ b/dom/media/AudioConverter.cpp
@@ -28,29 +28,17 @@ AudioConverter::AudioConverter(const Aud
aIn.Interleaved() == aOut.Interleaved(),
"No format or rate conversion is supported at this stage");
MOZ_DIAGNOSTIC_ASSERT(aOut.Channels() <= 2 ||
aIn.Channels() == aOut.Channels(),
"Only down/upmixing to mono or stereo is supported at this stage");
MOZ_DIAGNOSTIC_ASSERT(aOut.Interleaved(), "planar audio format not supported");
mIn.Layout().MappingTable(mOut.Layout(), mChannelOrderMap);
if (aIn.Rate() != aOut.Rate()) {
- int error;
- mResampler = speex_resampler_init(aOut.Channels(),
- aIn.Rate(),
- aOut.Rate(),
- SPEEX_RESAMPLER_QUALITY_DEFAULT,
- &error);
-
- if (error == RESAMPLER_ERR_SUCCESS) {
- speex_resampler_skip_zeros(mResampler);
- } else {
- NS_WARNING("Failed to initialize resampler.");
- mResampler = nullptr;
- }
+ RecreateResampler();
}
}
AudioConverter::~AudioConverter()
{
if (mResampler) {
speex_resampler_destroy(mResampler);
mResampler = nullptr;
@@ -277,16 +265,56 @@ AudioConverter::ResampleAudio(void* aOut
out, &outframes);
} else {
MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
}
MOZ_ASSERT(inframes == aFrames, "Some frames will be dropped");
return outframes;
}
+void
+AudioConverter::RecreateResampler()
+{
+ if (mResampler) {
+ speex_resampler_destroy(mResampler);
+ }
+ int error;
+ mResampler = speex_resampler_init(mOut.Channels(),
+ mIn.Rate(),
+ mOut.Rate(),
+ SPEEX_RESAMPLER_QUALITY_DEFAULT,
+ &error);
+
+ if (error == RESAMPLER_ERR_SUCCESS) {
+ speex_resampler_skip_zeros(mResampler);
+ } else {
+ NS_WARNING("Failed to initialize resampler.");
+ mResampler = nullptr;
+ }
+}
+
+size_t
+AudioConverter::DrainResampler(void* aOut)
+{
+ if (!mResampler) {
+ return 0;
+ }
+ int frames = speex_resampler_get_input_latency(mResampler);
+ AlignedByteBuffer buffer(FramesOutToSamples(frames) *
+ AudioConfig::SampleSize(mOut.Format()));
+ if (!buffer) {
+ // OOM
+ return 0;
+ }
+ frames = ResampleAudio(aOut, buffer.Data(), frames);
+ // Tore down the resampler as it's easier than handling follow-up.
+ RecreateResampler();
+ return frames;
+}
+
size_t
AudioConverter::UpmixAudio(void* aOut, const void* aIn, size_t aFrames) const
{
MOZ_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 ||
mIn.Format() == AudioConfig::FORMAT_FLT);
MOZ_ASSERT(mIn.Channels() < mOut.Channels());
MOZ_ASSERT(mIn.Channels() == 1, "Can only upmix mono for now");
MOZ_ASSERT(mOut.Channels() == 2, "Can only upmix to stereo for now");
@@ -322,17 +350,23 @@ AudioConverter::UpmixAudio(void* aOut, c
}
return aFrames;
}
size_t
AudioConverter::ResampleRecipientFrames(size_t aFrames) const
{
- return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
+ if (!aFrames && mIn.Rate() != mOut.Rate()) {
+ // The resampler will be drained, account for frames currently buffered
+ // in the resampler.
+ return speex_resampler_get_output_latency(mResampler);
+ } else {
+ return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
+ }
}
size_t
AudioConverter::FramesOutToSamples(size_t aFrames) const
{
return aFrames * mOut.Channels();
}
--- a/dom/media/AudioConverter.h
+++ b/dom/media/AudioConverter.h
@@ -118,16 +118,18 @@ typedef AudioDataBuffer<AudioConfig::FOR
class AudioConverter {
public:
AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut);
~AudioConverter();
// Convert the AudioDataBuffer.
// Conversion will be done in place if possible. Otherwise a new buffer will
// be returned.
+ // Providing an empty buffer and resampling is expected, the resampler
+ // will be drained.
template <AudioConfig::SampleFormat Format, typename Value>
AudioDataBuffer<Format, Value> Process(AudioDataBuffer<Format, Value>&& aBuffer)
{
MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == mOut.Format() && mIn.Format() == Format);
AudioDataBuffer<Format, Value> buffer = Move(aBuffer);
if (CanWorkInPlace()) {
size_t frames = SamplesInToFrames(buffer.Length());
frames = ProcessInternal(buffer.Data(), buffer.Data(), frames);
@@ -147,32 +149,36 @@ public:
MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == mOut.Format() && mIn.Format() == Format);
// Perform the downmixing / reordering in temporary buffer.
size_t frames = SamplesInToFrames(aBuffer.Length());
AlignedBuffer<Value> temp1;
if (!temp1.SetLength(FramesOutToSamples(frames))) {
return AudioDataBuffer<Format, Value>(Move(temp1));
}
frames = ProcessInternal(temp1.Data(), aBuffer.Data(), frames);
- if (!frames || mIn.Rate() == mOut.Rate()) {
+ if (mIn.Rate() == mOut.Rate()) {
temp1.SetLength(FramesOutToSamples(frames));
return AudioDataBuffer<Format, Value>(Move(temp1));
}
// At this point, temp1 contains the buffer reordered and downmixed.
// If we are downsampling we can re-use it.
AlignedBuffer<Value>* outputBuffer = &temp1;
AlignedBuffer<Value> temp2;
- if (mOut.Rate() > mIn.Rate()) {
- // We are upsampling, we can't work in place. Allocate another temporary
- // buffer where the upsampling will occur.
+ if (!frames || mOut.Rate() > mIn.Rate()) {
+ // We are upsampling or about to drain, we can't work in place.
+ // Allocate another temporary buffer where the upsampling will occur.
temp2.SetLength(FramesOutToSamples(ResampleRecipientFrames(frames)));
outputBuffer = &temp2;
}
- frames = ResampleAudio(outputBuffer->Data(), temp1.Data(), frames);
+ if (!frames) {
+ frames = DrainResampler(outputBuffer->Data());
+ } else {
+ frames = ResampleAudio(outputBuffer->Data(), temp1.Data(), frames);
+ }
outputBuffer->SetLength(FramesOutToSamples(frames));
return AudioDataBuffer<Format, Value>(Move(*outputBuffer));
}
// Attempt to convert the AudioDataBuffer in place.
// Will return 0 if the conversion wasn't possible.
template <typename Value>
size_t Process(Value* aBuffer, size_t aFrames)
@@ -218,13 +224,15 @@ private:
size_t FramesOutToSamples(size_t aFrames) const;
size_t SamplesInToFrames(size_t aSamples) const;
size_t FramesOutToBytes(size_t aFrames) const;
// Resampler context.
SpeexResamplerState* mResampler;
size_t ResampleAudio(void* aOut, const void* aIn, size_t aFrames);
size_t ResampleRecipientFrames(size_t aFrames) const;
+ void RecreateResampler();
+ size_t DrainResampler(void* aOut);
};
} // namespace mozilla
#endif /* AudioConverter_h */
--- a/dom/media/mediasink/DecodedAudioDataSink.cpp
+++ b/dom/media/mediasink/DecodedAudioDataSink.cpp
@@ -357,16 +357,18 @@ DecodedAudioDataSink::NotifyAudioNeeded(
if (!mConverter ||
(data->mRate != mConverter->InputConfig().Rate() ||
data->mChannels != mConverter->InputConfig().Channels())) {
SINK_LOG_V("Audio format changed from %u@%uHz to %u@%uHz",
mConverter? mConverter->InputConfig().Channels() : 0,
mConverter ? mConverter->InputConfig().Rate() : 0,
data->mChannels, data->mRate);
+ DrainConverter();
+
// mFramesParsed indicates the current playtime in frames at the current
// input sampling rate. Recalculate it per the new sampling rate.
if (mFramesParsed) {
// We minimize overflow.
uint32_t oldRate = mConverter->InputConfig().Rate();
uint32_t newRate = data->mRate;
CheckedInt64 result = SaferMultDiv(mFramesParsed, newRate, oldRate);
if (!result.isValid()) {
@@ -410,16 +412,19 @@ DecodedAudioDataSink::NotifyAudioNeeded(
missingFrames =
SaferMultDiv(missingFrames.value(), mOutputRate, data->mRate);
if (!missingFrames.isValid()) {
NS_WARNING("Int overflow in DecodedAudioDataSink");
mErrored = true;
return;
}
+ // We need to insert silence, first use drained frames if any.
+ missingFrames -= DrainConverter(missingFrames.value());
+ // Insert silence if still needed.
if (missingFrames.value()) {
AlignedAudioBuffer silenceData(missingFrames.value() * mOutputChannels);
if (!silenceData) {
NS_WARNING("OOM in DecodedAudioDataSink");
mErrored = true;
return;
}
RefPtr<AudioData> silence = CreateAudioFromBuffer(Move(silenceData), data);
@@ -430,20 +435,24 @@ DecodedAudioDataSink::NotifyAudioNeeded(
mLastEndTime = data->GetEndTime();
mFramesParsed += data->mFrames;
if (mConverter->InputConfig() != mConverter->OutputConfig()) {
AlignedAudioBuffer convertedData =
mConverter->Process(AudioSampleBuffer(Move(data->mAudioData))).Forget();
data = CreateAudioFromBuffer(Move(convertedData), data);
}
- PushProcessedAudio(data);
+ if (PushProcessedAudio(data)) {
+ mLastProcessedPacket = Some(data);
+ }
}
if (AudioQueue().IsFinished()) {
+ // We have reached the end of the data, drain the resampler.
+ DrainConverter();
mProcessedQueue.Finish();
}
}
uint32_t
DecodedAudioDataSink::PushProcessedAudio(AudioData* aData)
{
if (!aData || !aData->mFrames) {
@@ -474,10 +483,43 @@ DecodedAudioDataSink::CreateAudioFromBuf
duration.value(),
frames,
Move(aBuffer),
mOutputChannels,
mOutputRate);
return data.forget();
}
+uint32_t
+DecodedAudioDataSink::DrainConverter(uint32_t aMaxFrames)
+{
+ MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+ if (!mConverter || !mLastProcessedPacket || !aMaxFrames) {
+ // nothing to drain.
+ return 0;
+ }
+
+ RefPtr<AudioData> lastPacket = mLastProcessedPacket.ref();
+ mLastProcessedPacket.reset();
+
+ // To drain we simply provide an empty packet to the audio converter.
+ AlignedAudioBuffer convertedData =
+ mConverter->Process(AudioSampleBuffer(AlignedAudioBuffer())).Forget();
+
+ uint32_t frames = convertedData.Length() / mOutputChannels;
+ if (!convertedData.SetLength(std::min(frames, aMaxFrames) * mOutputChannels)) {
+ // This can never happen as we were reducing the length of convertData.
+ mErrored = true;
+ return 0;
+ }
+
+ RefPtr<AudioData> data =
+ CreateAudioFromBuffer(Move(convertedData), lastPacket);
+ if (!data) {
+ return 0;
+ }
+ mProcessedQueue.Push(data);
+ return data->mFrames;
+}
+
} // namespace media
} // namespace mozilla
--- a/dom/media/mediasink/DecodedAudioDataSink.h
+++ b/dom/media/mediasink/DecodedAudioDataSink.h
@@ -109,32 +109,36 @@ private:
Atomic<bool> mPlaybackComplete;
const RefPtr<AbstractThread> mOwnerThread;
// Audio Processing objects and methods
void OnAudioPopped(const RefPtr<MediaData>& aSample);
void OnAudioPushed(const RefPtr<MediaData>& aSample);
void NotifyAudioNeeded();
+ // Drain the converter and add the output to the processed audio queue.
+ // A maximum of aMaxFrames will be added.
+ uint32_t DrainConverter(uint32_t aMaxFrames = UINT32_MAX);
already_AddRefed<AudioData> CreateAudioFromBuffer(AlignedAudioBuffer&& aBuffer,
AudioData* aReference);
// Add data to the processsed queue, update mProcessedQueueLength and
// return the number of frames added.
uint32_t PushProcessedAudio(AudioData* aData);
UniquePtr<AudioConverter> mConverter;
MediaQueue<AudioData> mProcessedQueue;
// Length in microseconds of the ProcessedQueue
Atomic<int32_t> mProcessedQueueLength;
MediaEventListener mAudioQueueListener;
MediaEventListener mAudioQueueFinishListener;
MediaEventListener mProcessedQueueListener;
// Number of frames processed from AudioQueue(). Used to determine gaps in
// the input stream. It indicates the time in frames since playback started
// at the current input framerate.
int64_t mFramesParsed;
+ Maybe<RefPtr<AudioData>> mLastProcessedPacket;
int64_t mLastEndTime;
// Never modifed after construction.
uint32_t mOutputRate;
uint32_t mOutputChannels;
};
} // namespace media
} // namespace mozilla