Bug 1274445: [webm] P1. Track separately audio track from video track. r?kinetik draft
authorJean-Yves Avenard <jyavenard@mozilla.com>
Mon, 30 May 2016 15:40:05 +1000
changeset 372849 2453298ed43598bbab10286bbebad01073041be1
parent 372484 2c7440e46d8786b2c82a1d2004e2b6d9d13f4046
child 372850 2d6e61b29fe68fcd5416bf807ca907968e7af76d
push id19607
push userbmo:jyavenard@mozilla.com
push dateMon, 30 May 2016 11:39:19 +0000
reviewerskinetik
bugs1274445, 1224973
milestone49.0a1
Bug 1274445: [webm] P1. Track separately audio track from video track. r?kinetik Bug 1224973 added a new usage of the MediaDataDemuxer where we would seek only inside the video track. So we can't use a common nestegg context for demuxing both the audio and video. So instead we now use two different nestegg context. MozReview-Commit-ID: 4G86Na5abe2
dom/media/webm/WebMDemuxer.cpp
dom/media/webm/WebMDemuxer.h
--- a/dom/media/webm/WebMDemuxer.cpp
+++ b/dom/media/webm/WebMDemuxer.cpp
@@ -46,48 +46,49 @@ static Atomic<uint32_t> sStreamSourceID(
 
 // Functions for reading and seeking using WebMDemuxer required for
 // nestegg_io. The 'user data' passed to these functions is the
 // demuxer.
 static int webmdemux_read(void* aBuffer, size_t aLength, void* aUserData)
 {
   MOZ_ASSERT(aUserData);
   MOZ_ASSERT(aLength < UINT32_MAX);
-  WebMDemuxer* demuxer = reinterpret_cast<WebMDemuxer*>(aUserData);
+  WebMDemuxer::NestEggContext* context =
+    reinterpret_cast<WebMDemuxer::NestEggContext*>(aUserData);
   uint32_t count = aLength;
-  if (demuxer->IsMediaSource()) {
-    int64_t length = demuxer->GetEndDataOffset();
-    int64_t position = demuxer->GetResource()->Tell();
-    MOZ_ASSERT(position <= demuxer->GetResource()->GetLength());
+  if (context->IsMediaSource()) {
+    int64_t length = context->GetEndDataOffset();
+    int64_t position = context->GetResource()->Tell();
+    MOZ_ASSERT(position <= context->GetResource()->GetLength());
     MOZ_ASSERT(position <= length);
     if (length >= 0 && count + position > length) {
       count = length - position;
     }
     MOZ_ASSERT(count <= aLength);
   }
   uint32_t bytes = 0;
   nsresult rv =
-    demuxer->GetResource()->Read(static_cast<char*>(aBuffer), count, &bytes);
+    context->GetResource()->Read(static_cast<char*>(aBuffer), count, &bytes);
   bool eof = bytes < aLength;
   return NS_FAILED(rv) ? -1 : eof ? 0 : 1;
 }
 
 static int webmdemux_seek(int64_t aOffset, int aWhence, void* aUserData)
 {
   MOZ_ASSERT(aUserData);
-  WebMDemuxer* demuxer = reinterpret_cast<WebMDemuxer*>(aUserData);
-  nsresult rv = demuxer->GetResource()->Seek(aWhence, aOffset);
+  WebMDemuxer::NestEggContext* context = reinterpret_cast<WebMDemuxer::NestEggContext*>(aUserData);
+  nsresult rv = context->GetResource()->Seek(aWhence, aOffset);
   return NS_SUCCEEDED(rv) ? 0 : -1;
 }
 
 static int64_t webmdemux_tell(void* aUserData)
 {
   MOZ_ASSERT(aUserData);
-  WebMDemuxer* demuxer = reinterpret_cast<WebMDemuxer*>(aUserData);
-  return demuxer->GetResource()->Tell();
+  WebMDemuxer::NestEggContext* context = reinterpret_cast<WebMDemuxer::NestEggContext*>(aUserData);
+  return context->GetResource()->Tell();
 }
 
 static void webmdemux_log(nestegg* aContext,
                           unsigned int aSeverity,
                           char const* aFormat, ...)
 {
   if (!MOZ_LOG_TEST(gNesteggLog, LogLevel::Debug)) {
     return;
@@ -122,44 +123,66 @@ static void webmdemux_log(nestegg* aCont
 
   snprintf_literal(msg, "%p [Nestegg-%s] ", aContext, sevStr);
   PR_vsnprintf(msg+strlen(msg), sizeof(msg)-strlen(msg), aFormat, args);
   MOZ_LOG(gNesteggLog, LogLevel::Debug, (msg));
 
   va_end(args);
 }
 
+WebMDemuxer::NestEggContext::~NestEggContext()
+{
+  if (mContext) {
+    nestegg_destroy(mContext);
+  }
+}
+
+int
+WebMDemuxer::NestEggContext::Init()
+{
+  nestegg_io io;
+  io.read = webmdemux_read;
+  io.seek = webmdemux_seek;
+  io.tell = webmdemux_tell;
+  io.userdata = this;
+
+  // While reading the metadata, we do not really care about which nestegg
+  // context is being used so long that they are both initialised.
+  // For reading the metadata however, we will use mVideoContext.
+  return nestegg_init(&mContext, io, &webmdemux_log,
+                      mParent->IsMediaSource() ? mResource.GetLength() : -1);
+}
 
 WebMDemuxer::WebMDemuxer(MediaResource* aResource)
   : WebMDemuxer(aResource, false)
 {
 }
 
 WebMDemuxer::WebMDemuxer(MediaResource* aResource, bool aIsMediaSource)
-  : mResource(aResource)
+  : mVideoContext(this, aResource)
+  , mAudioContext(this, aResource)
   , mBufferedState(nullptr)
   , mInitData(nullptr)
-  , mContext(nullptr)
   , mVideoTrack(0)
   , mAudioTrack(0)
   , mSeekPreroll(0)
   , mAudioCodec(-1)
   , mVideoCodec(-1)
   , mHasVideo(false)
   , mHasAudio(false)
   , mNeedReIndex(true)
   , mLastWebMBlockOffset(-1)
   , mIsMediaSource(aIsMediaSource)
 {
 }
 
 WebMDemuxer::~WebMDemuxer()
 {
-  Reset();
-  Cleanup();
+  Reset(TrackInfo::kVideoTrack);
+  Reset(TrackInfo::kAudioTrack);
 }
 
 RefPtr<WebMDemuxer::InitPromise>
 WebMDemuxer::Init()
 {
   InitBufferedState();
 
   if (NS_FAILED(ReadMetadata())) {
@@ -223,83 +246,78 @@ WebMDemuxer::GetTrackDemuxer(TrackInfo::
   RefPtr<WebMTrackDemuxer> e =
     new WebMTrackDemuxer(this, aType, aTrackNumber);
   mDemuxers.AppendElement(e);
 
   return e.forget();
 }
 
 nsresult
-WebMDemuxer::Reset()
+WebMDemuxer::Reset(TrackInfo::TrackType aType)
 {
-  mVideoPackets.Reset();
-  mAudioPackets.Reset();
+  if (aType == TrackInfo::kVideoTrack) {
+    mVideoPackets.Reset();
+  } else {
+    mAudioPackets.Reset();
+  }
   return NS_OK;
 }
 
-void
-WebMDemuxer::Cleanup()
-{
-  if (mContext) {
-    nestegg_destroy(mContext);
-    mContext = nullptr;
-  }
-  mBufferedState = nullptr;
-}
-
 nsresult
 WebMDemuxer::ReadMetadata()
 {
-  nestegg_io io;
-  io.read = webmdemux_read;
-  io.seek = webmdemux_seek;
-  io.tell = webmdemux_tell;
-  io.userdata = this;
-  int r = nestegg_init(&mContext, io, &webmdemux_log,
-                       IsMediaSource() ? mResource.GetLength() : -1);
+  int r = mVideoContext.Init();
   if (r == -1) {
     return NS_ERROR_FAILURE;
   }
+  if (mAudioContext.Init() == -1) {
+    return NS_ERROR_FAILURE;
+  }
+
+  // For reading the metadata we can only use the video resource/context.
+  MediaResourceIndex& resource = Resource(TrackInfo::kVideoTrack);
+  nestegg* context = Context(TrackInfo::kVideoTrack);
+
   {
     // Check how much data nestegg read and force feed it to BufferedState.
-    RefPtr<MediaByteBuffer> buffer = mResource.MediaReadAt(0, mResource.Tell());
+    RefPtr<MediaByteBuffer> buffer = resource.MediaReadAt(0, resource.Tell());
     if (!buffer) {
       return NS_ERROR_FAILURE;
     }
     mBufferedState->NotifyDataArrived(buffer->Elements(), buffer->Length(), 0);
     if (mBufferedState->GetInitEndOffset() < 0) {
       return NS_ERROR_FAILURE;
     }
-    MOZ_ASSERT(mBufferedState->GetInitEndOffset() <= mResource.Tell());
+    MOZ_ASSERT(mBufferedState->GetInitEndOffset() <= resource.Tell());
   }
-  mInitData = mResource.MediaReadAt(0, mBufferedState->GetInitEndOffset());
+  mInitData = resource.MediaReadAt(0, mBufferedState->GetInitEndOffset());
   if (!mInitData ||
       mInitData->Length() != size_t(mBufferedState->GetInitEndOffset())) {
     return NS_ERROR_FAILURE;
   }
 
   unsigned int ntracks = 0;
-  r = nestegg_track_count(mContext, &ntracks);
+  r = nestegg_track_count(context, &ntracks);
   if (r == -1) {
     return NS_ERROR_FAILURE;
   }
 
   for (unsigned int track = 0; track < ntracks; ++track) {
-    int id = nestegg_track_codec_id(mContext, track);
+    int id = nestegg_track_codec_id(context, track);
     if (id == -1) {
       return NS_ERROR_FAILURE;
     }
-    int type = nestegg_track_type(mContext, track);
+    int type = nestegg_track_type(context, track);
     if (type == NESTEGG_TRACK_VIDEO && !mHasVideo) {
       nestegg_video_params params;
-      r = nestegg_track_video_params(mContext, track, &params);
+      r = nestegg_track_video_params(context, track, &params);
       if (r == -1) {
         return NS_ERROR_FAILURE;
       }
-      mVideoCodec = nestegg_track_codec_id(mContext, track);
+      mVideoCodec = nestegg_track_codec_id(context, track);
       switch(mVideoCodec) {
         case NESTEGG_CODEC_VP8:
           mInfo.mVideo.mMimeType = "video/webm; codecs=vp8";
           break;
         case NESTEGG_CODEC_VP9:
           mInfo.mVideo.mMimeType = "video/webm; codecs=vp9";
           break;
         default:
@@ -355,55 +373,55 @@ WebMDemuxer::ReadMetadata()
         case NESTEGG_VIDEO_STEREO_TOP_BOTTOM:
           mInfo.mVideo.mStereoMode = StereoMode::TOP_BOTTOM;
           break;
         case NESTEGG_VIDEO_STEREO_RIGHT_LEFT:
           mInfo.mVideo.mStereoMode = StereoMode::RIGHT_LEFT;
           break;
       }
       uint64_t duration = 0;
-      r = nestegg_duration(mContext, &duration);
+      r = nestegg_duration(context, &duration);
       if (!r) {
         mInfo.mVideo.mDuration = media::TimeUnit::FromNanoseconds(duration).ToMicroseconds();
       }
     } else if (type == NESTEGG_TRACK_AUDIO && !mHasAudio) {
       nestegg_audio_params params;
-      r = nestegg_track_audio_params(mContext, track, &params);
+      r = nestegg_track_audio_params(context, track, &params);
       if (r == -1) {
         return NS_ERROR_FAILURE;
       }
 
       mAudioTrack = track;
       mHasAudio = true;
       mCodecDelay = media::TimeUnit::FromNanoseconds(params.codec_delay).ToMicroseconds();
-      mAudioCodec = nestegg_track_codec_id(mContext, track);
+      mAudioCodec = nestegg_track_codec_id(context, track);
       if (mAudioCodec == NESTEGG_CODEC_VORBIS) {
         mInfo.mAudio.mMimeType = "audio/webm; codecs=vorbis";
       } else if (mAudioCodec == NESTEGG_CODEC_OPUS) {
         mInfo.mAudio.mMimeType = "audio/webm; codecs=opus";
         uint8_t c[sizeof(uint64_t)];
         BigEndian::writeUint64(&c[0], mCodecDelay);
         mInfo.mAudio.mCodecSpecificConfig->AppendElements(&c[0], sizeof(uint64_t));
       }
       mSeekPreroll = params.seek_preroll;
       mInfo.mAudio.mRate = params.rate;
       mInfo.mAudio.mChannels = params.channels;
 
       unsigned int nheaders = 0;
-      r = nestegg_track_codec_data_count(mContext, track, &nheaders);
+      r = nestegg_track_codec_data_count(context, track, &nheaders);
       if (r == -1) {
         return NS_ERROR_FAILURE;
       }
 
       AutoTArray<const unsigned char*,4> headers;
       AutoTArray<size_t,4> headerLens;
       for (uint32_t header = 0; header < nheaders; ++header) {
         unsigned char* data = 0;
         size_t length = 0;
-        r = nestegg_track_codec_data(mContext, track, header, &data, &length);
+        r = nestegg_track_codec_data(context, track, header, &data, &length);
         if (r == -1) {
           return NS_ERROR_FAILURE;
         }
         headers.AppendElement(data);
         headerLens.AppendElement(length);
       }
 
       // Vorbis has 3 headers, convert to Xiph extradata format to send them to
@@ -417,58 +435,61 @@ WebMDemuxer::ReadMetadata()
           return NS_ERROR_FAILURE;
         }
       }
       else {
         mInfo.mAudio.mCodecSpecificConfig->AppendElements(headers[0],
                                                           headerLens[0]);
       }
       uint64_t duration = 0;
-      r = nestegg_duration(mContext, &duration);
+      r = nestegg_duration(context, &duration);
       if (!r) {
         mInfo.mAudio.mDuration = media::TimeUnit::FromNanoseconds(duration).ToMicroseconds();
       }
     }
   }
   return NS_OK;
 }
 
 bool
 WebMDemuxer::IsSeekable() const
 {
-  return mContext && nestegg_has_cues(mContext);
+  return Context(TrackInfo::kVideoTrack) &&
+         nestegg_has_cues(Context(TrackInfo::kVideoTrack));
 }
 
 bool
 WebMDemuxer::IsSeekableOnlyInBufferedRanges() const
 {
-  return mContext && !nestegg_has_cues(mContext);
+  return Context(TrackInfo::kVideoTrack) &&
+         !nestegg_has_cues(Context(TrackInfo::kVideoTrack));
 }
 
 void
 WebMDemuxer::EnsureUpToDateIndex()
 {
   if (!mNeedReIndex || !mInitData) {
     return;
   }
-  AutoPinned<MediaResource> resource(mResource.GetResource());
+  AutoPinned<MediaResource> resource(
+    Resource(TrackInfo::kVideoTrack).GetResource());
   MediaByteRangeSet byteRanges;
   nsresult rv = resource->GetCachedRanges(byteRanges);
   if (NS_FAILED(rv) || !byteRanges.Length()) {
     return;
   }
   mBufferedState->UpdateIndex(byteRanges, resource);
 
   mNeedReIndex = false;
 
   if (!mIsMediaSource) {
     return;
   }
   mLastWebMBlockOffset = mBufferedState->GetLastBlockOffset();
-  MOZ_ASSERT(mLastWebMBlockOffset <= mResource.GetLength());
+  MOZ_ASSERT(mLastWebMBlockOffset <= resource->GetLength());
 }
 
 void
 WebMDemuxer::NotifyDataArrived()
 {
   WEBM_DEBUG("");
   mNeedReIndex = true;
 }
@@ -616,79 +637,65 @@ WebMDemuxer::GetNextPacket(TrackInfo::Tr
   return true;
 }
 
 RefPtr<NesteggPacketHolder>
 WebMDemuxer::NextPacket(TrackInfo::TrackType aType)
 {
   bool isVideo = aType == TrackInfo::kVideoTrack;
 
-  // The packet queue that packets will be pushed on if they
-  // are not the type we are interested in.
-  WebMPacketQueue& otherPackets = isVideo ? mAudioPackets : mVideoPackets;
-
-  // The packet queue for the type that we are interested in.
-  WebMPacketQueue &packets = isVideo ? mVideoPackets : mAudioPackets;
-
   // Flag to indicate that we do need to playback these types of
   // packets.
   bool hasType = isVideo ? mHasVideo : mHasAudio;
 
-  // Flag to indicate that we do need to playback the other type
-  // of track.
-  bool hasOtherType = isVideo ? mHasAudio : mHasVideo;
+  if (!hasType) {
+    return nullptr;
+  }
 
-  // Track we are interested in
-  uint32_t ourTrack = isVideo ? mVideoTrack : mAudioTrack;
-
-  // Value of other track
-  uint32_t otherTrack = isVideo ? mAudioTrack : mVideoTrack;
+  // The packet queue for the type that we are interested in.
+  WebMPacketQueue &packets = isVideo ? mVideoPackets : mAudioPackets;
 
   if (packets.GetSize() > 0) {
     return packets.PopFront();
   }
 
+  // Track we are interested in
+  uint32_t ourTrack = isVideo ? mVideoTrack : mAudioTrack;
+
   do {
-    RefPtr<NesteggPacketHolder> holder = DemuxPacket();
+    RefPtr<NesteggPacketHolder> holder = DemuxPacket(aType);
     if (!holder) {
       return nullptr;
     }
 
-    if (hasOtherType && otherTrack == holder->Track()) {
-      // Save the packet for when we want these packets
-      otherPackets.Push(holder);
-      continue;
-    }
-
-    // The packet is for the track we want to play
-    if (hasType && ourTrack == holder->Track()) {
+    if (ourTrack == holder->Track()) {
       return holder;
     }
   } while (true);
 }
 
 RefPtr<NesteggPacketHolder>
-WebMDemuxer::DemuxPacket()
+WebMDemuxer::DemuxPacket(TrackInfo::TrackType aType)
 {
   nestegg_packet* packet;
-  int r = nestegg_read_packet(mContext, &packet);
+  int r = nestegg_read_packet(Context(aType), &packet);
   if (r == 0) {
-    nestegg_read_reset(mContext);
+    nestegg_read_reset(Context(aType));
     return nullptr;
   } else if (r < 0) {
     return nullptr;
   }
 
   unsigned int track = 0;
   r = nestegg_packet_track(packet, &track);
   if (r == -1) {
     return nullptr;
   }
 
-  int64_t offset = mResource.Tell();
+  int64_t offset = Resource(aType).Tell();
   RefPtr<NesteggPacketHolder> holder = new NesteggPacketHolder();
   if (!holder->Init(packet, offset, track, false)) {
     return nullptr;
   }
 
   return holder;
 }
 
@@ -700,23 +707,24 @@ WebMDemuxer::PushAudioPacket(NesteggPack
 
 void
 WebMDemuxer::PushVideoPacket(NesteggPacketHolder* aItem)
 {
   mVideoPackets.PushFront(aItem);
 }
 
 nsresult
-WebMDemuxer::SeekInternal(const media::TimeUnit& aTarget)
+WebMDemuxer::SeekInternal(TrackInfo::TrackType aType,
+                          const media::TimeUnit& aTarget)
 {
   EnsureUpToDateIndex();
   uint32_t trackToSeek = mHasVideo ? mVideoTrack : mAudioTrack;
   uint64_t target = aTarget.ToNanoseconds();
 
-  if (NS_FAILED(Reset())) {
+  if (NS_FAILED(Reset(aType))) {
     return NS_ERROR_FAILURE;
   }
 
   if (mSeekPreroll) {
     uint64_t startTime = 0;
     if (!mBufferedState->GetStartTime(&startTime)) {
       startTime = 0;
     }
@@ -727,58 +735,62 @@ WebMDemuxer::SeekInternal(const media::T
     } else {
       target -= mSeekPreroll;
     }
     WEBM_DEBUG("SeekPreroll: %f StartTime: %f Adjusted Target: %f",
                media::TimeUnit::FromNanoseconds(mSeekPreroll).ToSeconds(),
                media::TimeUnit::FromNanoseconds(startTime).ToSeconds(),
                media::TimeUnit::FromNanoseconds(target).ToSeconds());
   }
-  int r = nestegg_track_seek(mContext, trackToSeek, target);
+  int r = nestegg_track_seek(Context(aType), trackToSeek, target);
   if (r == -1) {
     WEBM_DEBUG("track_seek for track %u to %f failed, r=%d", trackToSeek,
                media::TimeUnit::FromNanoseconds(target).ToSeconds(), r);
     // Try seeking directly based on cluster information in memory.
     int64_t offset = 0;
     bool rv = mBufferedState->GetOffsetForTime(target, &offset);
     if (!rv) {
       WEBM_DEBUG("mBufferedState->GetOffsetForTime failed too");
       return NS_ERROR_FAILURE;
     }
 
-    r = nestegg_offset_seek(mContext, offset);
+    r = nestegg_offset_seek(Context(aType), offset);
     if (r == -1) {
       WEBM_DEBUG("and nestegg_offset_seek to %" PRIu64 " failed", offset);
       return NS_ERROR_FAILURE;
     }
     WEBM_DEBUG("got offset from buffered state: %" PRIu64 "", offset);
   }
 
-  mLastAudioFrameTime.reset();
-  mLastVideoFrameTime.reset();
+  if (aType == TrackInfo::kAudioTrack) {
+    mLastAudioFrameTime.reset();
+  } else {
+    mLastVideoFrameTime.reset();
+  }
 
   return NS_OK;
 }
 
 media::TimeIntervals
 WebMDemuxer::GetBuffered()
 {
   EnsureUpToDateIndex();
-  AutoPinned<MediaResource> resource(mResource.GetResource());
+  AutoPinned<MediaResource> resource(
+    Resource(TrackInfo::kVideoTrack).GetResource());
 
   media::TimeIntervals buffered;
 
   MediaByteRangeSet ranges;
   nsresult rv = resource->GetCachedRanges(ranges);
   if (NS_FAILED(rv)) {
     return media::TimeIntervals();
   }
   uint64_t duration = 0;
   uint64_t startOffset = 0;
-  if (!nestegg_duration(mContext, &duration)) {
+  if (!nestegg_duration(Context(TrackInfo::kVideoTrack), &duration)) {
     if(mBufferedState->GetStartTime(&startOffset)) {
       duration += startOffset;
     }
     WEBM_DEBUG("Duration: %f StartTime: %f",
                media::TimeUnit::FromNanoseconds(duration).ToSeconds(),
                media::TimeUnit::FromNanoseconds(startOffset).ToSeconds());
   }
   for (uint32_t index = 0; index < ranges.Length(); index++) {
@@ -838,17 +850,17 @@ WebMTrackDemuxer::GetInfo() const
 RefPtr<WebMTrackDemuxer::SeekPromise>
 WebMTrackDemuxer::Seek(media::TimeUnit aTime)
 {
   // Seeks to aTime. Upon success, SeekPromise will be resolved with the
   // actual time seeked to. Typically the random access point time
 
   media::TimeUnit seekTime = aTime;
   mSamples.Reset();
-  mParent->SeekInternal(aTime);
+  mParent->SeekInternal(mType, aTime);
   mParent->GetNextPacket(mType, &mSamples);
   mNeedKeyframe = true;
 
   // Check what time we actually seeked to.
   if (mSamples.GetSize() > 0) {
     const RefPtr<MediaRawData>& sample = mSamples.First();
     seekTime = media::TimeUnit::FromMicroseconds(sample->mTime);
   }
@@ -959,17 +971,17 @@ WebMTrackDemuxer::SetNextKeyFrameTime()
 void
 WebMTrackDemuxer::Reset()
 {
   mSamples.Reset();
   media::TimeIntervals buffered = GetBuffered();
   mNeedKeyframe = true;
   if (buffered.Length()) {
     WEBM_DEBUG("Seek to start point: %f", buffered.Start(0).ToSeconds());
-    mParent->SeekInternal(buffered.Start(0));
+    mParent->SeekInternal(mType, buffered.Start(0));
     SetNextKeyFrameTime();
   } else {
     mNextKeyframeTime.reset();
   }
 }
 
 void
 WebMTrackDemuxer::UpdateSamples(nsTArray<RefPtr<MediaRawData>>& aSamples)
--- a/dom/media/webm/WebMDemuxer.h
+++ b/dom/media/webm/WebMDemuxer.h
@@ -102,75 +102,106 @@ public:
 
   UniquePtr<EncryptionInfo> GetCrypto() override;
 
   bool GetOffsetForTime(uint64_t aTime, int64_t* aOffset);
 
   // Demux next WebM packet and append samples to MediaRawDataQueue
   bool GetNextPacket(TrackInfo::TrackType aType, MediaRawDataQueue *aSamples);
 
-  nsresult Reset();
+  nsresult Reset(TrackInfo::TrackType aType);
 
   // Pushes a packet to the front of the audio packet queue.
   void PushAudioPacket(NesteggPacketHolder* aItem);
 
   // Pushes a packet to the front of the video packet queue.
   void PushVideoPacket(NesteggPacketHolder* aItem);
 
   // Public accessor for nestegg callbacks
-  MediaResourceIndex* GetResource()
-  {
-    return &mResource;
-  }
-
-  int64_t GetEndDataOffset() const
-  {
-    return (!mIsMediaSource || mLastWebMBlockOffset < 0)
-      ? mResource.GetLength() : mLastWebMBlockOffset;
-  }
-  int64_t IsMediaSource() const
+  bool IsMediaSource() const
   {
     return mIsMediaSource;
   }
 
+  int64_t LastWebMBlockOffset() const
+  {
+    return mLastWebMBlockOffset;
+  }
+
+  struct NestEggContext {
+    NestEggContext(WebMDemuxer* aParent, MediaResource* aResource)
+    : mParent(aParent)
+    , mResource(aResource)
+    , mContext(nullptr) {}
+
+    ~NestEggContext();
+
+    int Init();
+
+    // Public accessor for nestegg callbacks
+
+    bool IsMediaSource() const { return mParent->IsMediaSource(); }
+    MediaResourceIndex* GetResource() { return &mResource; }
+
+    int64_t GetEndDataOffset() const
+    {
+      return (!mParent->IsMediaSource() || mParent->LastWebMBlockOffset() < 0)
+             ? mResource.GetLength() : mParent->LastWebMBlockOffset();
+    }
+
+    WebMDemuxer* mParent;
+    MediaResourceIndex mResource;
+    nestegg* mContext;
+  };
+
 private:
   friend class WebMTrackDemuxer;
 
   ~WebMDemuxer();
-  void Cleanup();
   void InitBufferedState();
   nsresult ReadMetadata();
   void NotifyDataArrived() override;
   void NotifyDataRemoved() override;
   void EnsureUpToDateIndex();
   media::TimeIntervals GetBuffered();
-  nsresult SeekInternal(const media::TimeUnit& aTarget);
+  nsresult SeekInternal(TrackInfo::TrackType aType,
+                        const media::TimeUnit& aTarget);
 
   // Read a packet from the nestegg file. Returns nullptr if all packets for
   // the particular track have been read. Pass TrackInfo::kVideoTrack or
   // TrackInfo::kVideoTrack to indicate the type of the packet we want to read.
   RefPtr<NesteggPacketHolder> NextPacket(TrackInfo::TrackType aType);
 
   // Internal method that demuxes the next packet from the stream. The caller
   // is responsible for making sure it doesn't get lost.
-  RefPtr<NesteggPacketHolder> DemuxPacket();
+  RefPtr<NesteggPacketHolder> DemuxPacket(TrackInfo::TrackType aType);
 
-  MediaResourceIndex mResource;
+  // libnestegg audio and video context for webm container.
+  // Access on reader's thread only.
+  NestEggContext mVideoContext;
+  NestEggContext mAudioContext;
+  MediaResourceIndex& Resource(TrackInfo::TrackType aType)
+  {
+    return aType == TrackInfo::kVideoTrack
+           ? mVideoContext.mResource : mAudioContext.mResource;
+  }
+  nestegg* Context(TrackInfo::TrackType aType) const
+  {
+    return aType == TrackInfo::kVideoTrack
+           ? mVideoContext.mContext : mAudioContext.mContext;
+  }
+
   MediaInfo mInfo;
   nsTArray<RefPtr<WebMTrackDemuxer>> mDemuxers;
 
   // Parser state and computed offset-time mappings.  Shared by multiple
   // readers when decoder has been cloned.  Main thread only.
   RefPtr<WebMBufferedState> mBufferedState;
   RefPtr<MediaByteBuffer> mInitData;
 
-  // libnestegg context for webm container.
-  // Access on reader's thread for main demuxer,
-  // or main thread for cloned demuxer
-  nestegg* mContext;
 
   // Queue of video and audio packets that have been read but not decoded.
   WebMPacketQueue mVideoPackets;
   WebMPacketQueue mAudioPackets;
 
   // Index of video and audio track to play
   uint32_t mVideoTrack;
   uint32_t mAudioTrack;