Bug 1262753: P4. Add resampling capabilities to AudioConverter. r?kinetik draft
authorJean-Yves Avenard <jyavenard@mozilla.com>
Mon, 11 Apr 2016 21:07:11 +1000
changeset 352408 59b7feafe4c24789952a434c2b54336a8b0f97e4
parent 352407 490d83b77e1f5e7fc0da89835b315ac921ae4f29
child 352409 114f5055a774cd6d4b5d9572b9739b965be8a012
push id15703
push userbmo:jyavenard@mozilla.com
push dateMon, 18 Apr 2016 07:44:49 +0000
reviewerskinetik
bugs1262753
milestone48.0a1
Bug 1262753: P4. Add resampling capabilities to AudioConverter. r?kinetik MozReview-Commit-ID: 93PNC5ooEPg
dom/media/AudioConverter.cpp
dom/media/AudioConverter.h
dom/media/platforms/agnostic/VorbisDecoder.cpp
dom/media/platforms/apple/AppleATDecoder.cpp
--- a/dom/media/AudioConverter.cpp
+++ b/dom/media/AudioConverter.cpp
@@ -1,60 +1,92 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioConverter.h"
 #include <string.h>
+#include <speex/speex_resampler.h>
 
 /*
  *  Parts derived from MythTV AudioConvert Class
  *  Created by Jean-Yves Avenard.
  *
  *  Copyright (C) Bubblestuff Pty Ltd 2013
  *  Copyright (C) foobum@gmail.com 2010
  */
 
 namespace mozilla {
 
 AudioConverter::AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut)
   : mIn(aIn)
   , mOut(aOut)
+  , mResampler(nullptr)
 {
-  MOZ_DIAGNOSTIC_ASSERT(aIn.Rate() == aOut.Rate() &&
-                        aIn.Format() == aOut.Format() &&
+  MOZ_DIAGNOSTIC_ASSERT(aIn.Format() == aOut.Format() &&
                         aIn.Interleaved() == aOut.Interleaved(),
                         "No format or rate conversion is supported at this stage");
   MOZ_DIAGNOSTIC_ASSERT((aIn.Channels() > aOut.Channels() && aOut.Channels() <= 2) ||
                         aIn.Channels() == aOut.Channels(),
                         "Only downmixing to mono or stereo is supported at this stage");
   MOZ_DIAGNOSTIC_ASSERT(aOut.Interleaved(), "planar audio format not supported");
   mIn.Layout().MappingTable(mOut.Layout(), mChannelOrderMap);
+  if (aIn.Rate() != aOut.Rate()) {
+    int error;
+    mResampler = speex_resampler_init(aOut.Channels(),
+                                      aIn.Rate(),
+                                      aOut.Rate(),
+                                      SPEEX_RESAMPLER_QUALITY_DEFAULT,
+                                      &error);
+
+    if (error == RESAMPLER_ERR_SUCCESS) {
+      speex_resampler_skip_zeros(mResampler);
+    } else {
+      NS_WARNING("Failed to initialize resampler.");
+      mResampler = nullptr;
+    }
+  }
+}
+
+AudioConverter::~AudioConverter()
+{
+  if (mResampler) {
+    speex_resampler_destroy(mResampler);
+    mResampler = nullptr;
+  }
 }
 
 bool
 AudioConverter::CanWorkInPlace() const
 {
-  return mIn.Channels() * mIn.Rate() * AudioConfig::SampleSize(mIn.Format()) >=
-    mOut.Channels() * mOut.Rate() * AudioConfig::SampleSize(mOut.Format());
+  bool needDownmix = mIn.Channels() > mOut.Channels();
+  bool canDownmixInPlace =
+    mIn.Channels() * AudioConfig::SampleSize(mIn.Format()) >=
+    mOut.Channels() * AudioConfig::SampleSize(mOut.Format());
+  bool needResample = mIn.Rate() != mOut.Rate();
+  bool canResampleInPlace = mIn.Rate() >= mOut.Rate();
+  // We should be able to work in place if 1s of audio input takes less space
+  // than 1s of audio output. However, as we downmix before resampling we can't
+  // perform any upsampling in place (e.g. if incoming rate >= outgoing rate)
+  return (!needDownmix || canDownmixInPlace) &&
+         (!needResample || canResampleInPlace);
 }
 
 size_t
-AudioConverter::Process(void* aOut, const void* aIn, size_t aBytes)
+AudioConverter::ProcessInternal(void* aOut, const void* aIn, size_t aBytes)
 {
-  if (!CanWorkInPlace()) {
-    return 0;
-  }
   if (mIn.Channels() > mOut.Channels()) {
     return DownmixAudio(aOut, aIn, aBytes);
   } else if (mIn.Layout() != mOut.Layout() &&
       CanReorderAudio()) {
     ReOrderInterleavedChannels(aOut, aIn, aBytes);
+  } else if (aIn != aOut) {
+    memmove(aOut, aIn, aBytes);
   }
   return aBytes;
 }
 
 // Reorder interleaved channels.
 // Can work in place (e.g aOut == aIn).
 template <class AudioDataType>
 void
@@ -218,12 +250,46 @@ AudioConverter::DownmixAudio(void* aOut,
         // The sample of the buffer would be interleaved.
         sample = (in[fIdx*channels] + in[fIdx*channels + 1]) * 0.5;
         *out++ = sample;
       }
     } else {
       MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
     }
   }
-  return frames * AudioConfig::SampleSize(mOut.Format()) * mOut.Channels();
+  return (size_t)frames * AudioConfig::SampleSize(mOut.Format()) * mOut.Channels();
 }
 
-} // namespace mozilla
\ No newline at end of file
+size_t
+AudioConverter::ResampleAudio(void* aOut, const void* aIn, size_t aDataSize)
+{
+  if (!mResampler) {
+    return 0;
+  }
+  uint32_t frames =
+    aDataSize / AudioConfig::SampleSize(mOut.Format()) / mOut.Channels();
+  uint32_t outframes = ResampleRecipientFrames(frames);
+  uint32_t inframes = frames;
+
+  if (mOut.Format() == AudioConfig::FORMAT_FLT) {
+    const float* in = reinterpret_cast<const float*>(aIn);
+    float* out = reinterpret_cast<float*>(aOut);
+    speex_resampler_process_interleaved_float(mResampler, in, &inframes,
+                                              out, &outframes);
+  } else if (mOut.Format() == AudioConfig::FORMAT_S16) {
+    const int16_t* in = reinterpret_cast<const int16_t*>(aIn);
+    int16_t* out = reinterpret_cast<int16_t*>(aOut);
+    speex_resampler_process_interleaved_int(mResampler, in, &inframes,
+                                            out, &outframes);
+  } else {
+    MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type");
+  }
+  MOZ_ASSERT(inframes == frames, "Some frames will be dropped");
+  return (size_t)outframes * AudioConfig::SampleSize(mOut.Format()) * mOut.Channels();
+}
+
+size_t
+AudioConverter::ResampleRecipientFrames(size_t aFrames) const
+{
+  return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
+}
+
+} // namespace mozilla
--- a/dom/media/AudioConverter.h
+++ b/dom/media/AudioConverter.h
@@ -4,16 +4,19 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #if !defined(AudioConverter_h)
 #define AudioConverter_h
 
 #include "MediaInfo.h"
 
+// Forward declaration
+typedef struct SpeexResamplerState_ SpeexResamplerState;
+
 namespace mozilla {
 
 template <AudioConfig::SampleFormat T> struct AudioDataBufferTypeChooser;
 template <> struct AudioDataBufferTypeChooser<AudioConfig::FORMAT_U8>
 { typedef uint8_t Type; };
 template <> struct AudioDataBufferTypeChooser<AudioConfig::FORMAT_S16>
 { typedef int16_t Type; };
 template <> struct AudioDataBufferTypeChooser<AudioConfig::FORMAT_S24LSB>
@@ -110,55 +113,114 @@ private:
   AlignedBuffer<Value> mBuffer;
 };
 
 typedef AudioDataBuffer<AudioConfig::FORMAT_DEFAULT> AudioSampleBuffer;
 
 class AudioConverter {
 public:
   AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut);
+  ~AudioConverter();
+
+  // Convert the AudioDataBuffer.
+  // Conversion will be done in place if possible. Otherwise a new buffer will
+  // be returned.
+  template <AudioConfig::SampleFormat Format, typename Value>
+  AudioDataBuffer<Format, Value> Process(AudioDataBuffer<Format, Value>&& aBuffer)
+  {
+    MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == mOut.Format() && mIn.Format() == Format);
+    AudioDataBuffer<Format, Value> buffer = Move(aBuffer);
+    if (CanWorkInPlace()) {
+      size_t bytes = ProcessInternal(buffer.Data(), buffer.Data(), buffer.Size());
+      if (bytes && mIn.Rate() != mOut.Rate()) {
+        bytes = ResampleAudio(buffer.Data(), buffer.Data(), bytes);
+      }
+      AlignedBuffer<Value> temp = buffer.Forget();
+      temp.SetLength(bytes / AudioConfig::SampleSize(mOut.Format()));
+      return AudioDataBuffer<Format, Value>(Move(temp));;
+    }
+    return Process(buffer);
+  }
+
+  template <AudioConfig::SampleFormat Format, typename Value>
+  AudioDataBuffer<Format, Value> Process(const AudioDataBuffer<Format, Value>& aBuffer)
+  {
+    MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == mOut.Format() && mIn.Format() == Format);
+    // Perform the downmixing / reordering in temporary buffer.
+    uint32_t frames = aBuffer.Length() / mIn.Channels();
+    AlignedBuffer<Value> temp1;
+    if (!temp1.SetLength(frames * mOut.Channels())) {
+      return AudioDataBuffer<Format, Value>(Move(temp1));
+    }
+    size_t bytes = ProcessInternal(temp1.Data(), aBuffer.Data(), aBuffer.Size());
+    if (!bytes || mIn.Rate() == mOut.Rate()) {
+      temp1.SetLength(bytes / AudioConfig::SampleSize(mOut.Format()));
+      return AudioDataBuffer<Format, Value>(Move(temp1));
+    }
+
+    // At this point, temp1 contains the buffer reordered and downmixed.
+    // If we are downsampling we can re-use it.
+    AlignedBuffer<Value>* outputBuffer = &temp1;
+    AlignedBuffer<Value> temp2;
+    if (mOut.Rate() > mIn.Rate()) {
+      // We are upsampling, we can't work in place. Allocate another temporary
+      // buffer where the upsampling will occur.
+      temp2.SetLength(ResampleRecipientFrames(frames) * mOut.Channels());
+      outputBuffer = &temp2;
+    }
+    bytes = ResampleAudio(outputBuffer->Data(), temp1.Data(), bytes);
+    outputBuffer->SetLength(bytes / AudioConfig::SampleSize(mOut.Format()));
+    return AudioDataBuffer<Format, Value>(Move(*outputBuffer));
+  }
 
   // Attempt to convert the AudioDataBuffer in place.
   // Will return 0 if the conversion wasn't possible.
-  // Process may allocate memory internally should intermediary steps be
-  // required.
-  template <AudioConfig::SampleFormat Type, typename Value>
-  size_t Process(AudioDataBuffer<Type, Value>& aBuffer)
-  {
-    MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == mOut.Format() && mIn.Format() == Type);
-    return Process(aBuffer.Data(), aBuffer.Data(), aBuffer.Size());
-  }
   template <typename Value>
   size_t Process(Value* aBuffer, size_t aSamples)
   {
     MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == mOut.Format());
-    return Process(aBuffer, aBuffer, aSamples * AudioConfig::SampleSize(mIn.Format()));
+    if (!CanWorkInPlace()) {
+      return 0;
+    }
+    size_t bytes =
+      ProcessInternal(aBuffer, aBuffer,
+                      aSamples * AudioConfig::SampleSize(mIn.Format()));
+    if (bytes && mIn.Rate() != mOut.Rate()) {
+      bytes = ResampleAudio(aBuffer, aBuffer, bytes);
+    }
+    return bytes;
   }
+
   bool CanWorkInPlace() const;
   bool CanReorderAudio() const
   {
     return mIn.Layout().MappingTable(mOut.Layout());
   }
 
   const AudioConfig& InputConfig() const { return mIn; }
   const AudioConfig& OutputConfig() const { return mOut; }
 
 private:
   const AudioConfig mIn;
   const AudioConfig mOut;
   uint8_t mChannelOrderMap[MAX_AUDIO_CHANNELS];
   /**
-   * Process
+   * ProcessInternal
    * Parameters:
    * aOut  : destination buffer where converted samples will be copied
    * aIn   : source buffer
    * aBytes: size in bytes of source buffer
    *
    * Return Value: size in bytes of samples converted or 0 if error
    */
-  size_t Process(void* aOut, const void* aIn, size_t aBytes);
+  size_t ProcessInternal(void* aOut, const void* aIn, size_t aBytes);
   void ReOrderInterleavedChannels(void* aOut, const void* aIn, size_t aDataSize) const;
   size_t DownmixAudio(void* aOut, const void* aIn, size_t aDataSize) const;
+
+  // Resampler context.
+  SpeexResamplerState* mResampler;
+  size_t ResampleAudio(void* aOut, const void* aIn, size_t aDataSize);
+  size_t ResampleRecipientFrames(size_t aFrames) const;
 };
 
 } // namespace mozilla
 
 #endif /* AudioConverter_h */
--- a/dom/media/platforms/agnostic/VorbisDecoder.cpp
+++ b/dom/media/platforms/agnostic/VorbisDecoder.cpp
@@ -222,17 +222,17 @@ VorbisDataDecoder::DoDecode(MediaRawData
     if (!mAudioConverter) {
       AudioConfig in(AudioConfig::ChannelLayout(channels, VorbisLayout(channels)),
                      rate);
       AudioConfig out(channels, rate);
       mAudioConverter = MakeUnique<AudioConverter>(in, out);
     }
     MOZ_ASSERT(mAudioConverter->CanWorkInPlace());
     AudioSampleBuffer data(Move(buffer));
-    mAudioConverter->Process(data);
+    data = mAudioConverter->Process(Move(data));
 
     aTotalFrames += frames;
     mCallback->Output(new AudioData(aOffset,
                                     time.value(),
                                     duration.value(),
                                     frames,
                                     data.Forget(),
                                     channels,
--- a/dom/media/platforms/apple/AppleATDecoder.cpp
+++ b/dom/media/platforms/apple/AppleATDecoder.cpp
@@ -282,17 +282,17 @@ AppleATDecoder::DecodeSample(MediaRawDat
   }
   if (mChannelLayout && !mAudioConverter) {
     AudioConfig in(*mChannelLayout.get(), rate);
     AudioConfig out(channels, rate);
     mAudioConverter = MakeUnique<AudioConverter>(in, out);
   }
   if (mAudioConverter) {
     MOZ_ASSERT(mAudioConverter->CanWorkInPlace());
-    mAudioConverter->Process(data);
+    data = mAudioConverter->Process(Move(data));
   }
 
   RefPtr<AudioData> audio = new AudioData(aSample->mOffset,
                                           aSample->mTime,
                                           duration.ToMicroseconds(),
                                           numFrames,
                                           data.Forget(),
                                           channels,