Bug 1288091 - Update to libopus 1.1.3. r?jmspeex draft
authorRalph Giles <giles@mozilla.com>
Tue, 19 Jul 2016 18:06:20 -0400
changeset 389984 cf5f20c9c9d1f2c7995d16d5d63d8121947fd924
parent 389960 ed8e23b5e0c7b739e61173bb180cf3410a306679
child 525909 b04be164291ddf77c4b9d83169b3aef3bcaecf03
push id23574
push userbmo:giles@thaumas.net
push dateWed, 20 Jul 2016 14:00:11 +0000
reviewersjmspeex
bugs1288091
milestone50.0a1
Bug 1288091 - Update to libopus 1.1.3. r?jmspeex New upstream release. This is a minor release focusing mainly on optimizations and bug fixes. - Neon optimizations inproving performance on ARMv7 and ARMv8 by up to 15% - Fixes some issues with 16-bit platforms (e.g. TI C55x) - Fixes to comfort noise generation (CNG) - Documenting that PLC packets can also be 2 bytes - Includes experimental ambisonics work (--enable-ambisonics) MozReview-Commit-ID: IcdnCok500X
media/libopus/README_MOZILLA
media/libopus/celt/arch.h
media/libopus/celt/arm/arm_celt_map.c
media/libopus/celt/arm/armcpu.c
media/libopus/celt/arm/armcpu.h
media/libopus/celt/arm/celt_neon_intr.c
media/libopus/celt/arm/fixed_arm64.h
media/libopus/celt/arm/pitch_arm.h
media/libopus/celt/bands.c
media/libopus/celt/celt.h
media/libopus/celt/celt_decoder.c
media/libopus/celt/celt_encoder.c
media/libopus/celt/celt_lpc.c
media/libopus/celt/cwrs.c
media/libopus/celt/fixed_generic.h
media/libopus/celt/kiss_fft.c
media/libopus/celt/mathops.c
media/libopus/celt/pitch.c
media/libopus/celt/pitch.h
media/libopus/celt/rate.c
media/libopus/celt/vq.c
media/libopus/celt/x86/pitch_sse.h
media/libopus/celt/x86/x86_celt_map.c
media/libopus/celt/x86/x86cpu.c
media/libopus/include/opus.h
media/libopus/include/opus_defines.h
media/libopus/include/opus_multistream.h
media/libopus/moz.build
media/libopus/silk/CNG.c
media/libopus/silk/NLSF_del_dec_quant.c
media/libopus/silk/NLSF_encode.c
media/libopus/silk/NSQ.c
media/libopus/silk/NSQ.h
media/libopus/silk/NSQ_del_dec.c
media/libopus/silk/PLC.c
media/libopus/silk/arm/NSQ_neon.c
media/libopus/silk/arm/NSQ_neon.h
media/libopus/silk/arm/arm_silk_map.c
media/libopus/silk/arm/macros_arm64.h
media/libopus/silk/decode_core.c
media/libopus/silk/fixed/burg_modified_FIX.c
media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c
media/libopus/silk/macros.h
media/libopus/silk/mips/NSQ_del_dec_mipsr1.h
media/libopus/silk/process_NLSFs.c
media/libopus/silk/sort.c
media/libopus/silk/stereo_LR_to_MS.c
media/libopus/silk/x86/NSQ_sse.c
media/libopus/silk/x86/main_sse.h
media/libopus/sources.mozbuild
media/libopus/src/analysis.c
media/libopus/src/opus.c
media/libopus/src/opus_encoder.c
media/libopus/src/opus_multistream_encoder.c
media/libopus/src/repacketizer.c
--- a/media/libopus/README_MOZILLA
+++ b/media/libopus/README_MOZILLA
@@ -3,9 +3,9 @@ IETF Opus audio codec reference implemen
 The source in this directory was copied from an opus
 repository checkout by running the ./update.sh script.
 Any changes made to this version of the source should
 be reflected in that script, e.g. by applying patch
 files after the copy step.
 
 The upstream repository is https://git.xiph.org/opus.git
 
-The git tag/revision used was v1.1.2.
+The git tag/revision used was v1.1.3.
--- a/media/libopus/celt/arch.h
+++ b/media/libopus/celt/arch.h
@@ -73,16 +73,25 @@ static OPUS_INLINE void _celt_fatal(cons
 #define MAX16(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 16-bit value.   */
 #define MIN32(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 32-bit value.   */
 #define MAX32(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 32-bit value.   */
 #define IMIN(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum int value.   */
 #define IMAX(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum int value.   */
 #define UADD32(a,b) ((a)+(b))
 #define USUB32(a,b) ((a)-(b))
 
+/* Set this if opus_int64 is a native type of the CPU. */
+/* Assume that all LP64 architectures have fast 64-bit types; also x86_64
+   (which can be ILP32 for x32) and Win64 (which is LLP64). */
+#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)
+#define OPUS_FAST_INT64 1
+#else
+#define OPUS_FAST_INT64 0
+#endif
+
 #define PRINT_MIPS(file)
 
 #ifdef FIXED_POINT
 
 typedef opus_int16 opus_val16;
 typedef opus_int32 opus_val32;
 
 typedef opus_val32 celt_sig;
@@ -113,17 +122,19 @@ static OPUS_INLINE opus_int16 SAT16(opus
 }
 
 #ifdef FIXED_DEBUG
 #include "fixed_debug.h"
 #else
 
 #include "fixed_generic.h"
 
-#ifdef OPUS_ARM_INLINE_EDSP
+#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
+#include "arm/fixed_arm64.h"
+#elif OPUS_ARM_INLINE_EDSP
 #include "arm/fixed_armv5e.h"
 #elif defined (OPUS_ARM_INLINE_ASM)
 #include "arm/fixed_armv4.h"
 #elif defined (BFIN_ASM)
 #include "fixed_bfin.h"
 #elif defined (TI_C5X_ASM)
 #include "fixed_c5x.h"
 #elif defined (TI_C6X_ASM)
--- a/media/libopus/celt/arm/arm_celt_map.c
+++ b/media/libopus/celt/arm/arm_celt_map.c
@@ -31,35 +31,57 @@
 
 #include "pitch.h"
 #include "kiss_fft.h"
 #include "mdct.h"
 
 #if defined(OPUS_HAVE_RTCD)
 
 # if defined(FIXED_POINT)
+#  if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
+    (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
+    (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
 opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
     const opus_val16 *, opus_val32 *, int , int) = {
   celt_pitch_xcorr_c,               /* ARMv4 */
   MAY_HAVE_EDSP(celt_pitch_xcorr),  /* EDSP */
   MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
   MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
 };
+
+#  endif
 # else /* !FIXED_POINT */
-#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
 void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
     const opus_val16 *, opus_val32 *, int, int) = {
   celt_pitch_xcorr_c,              /* ARMv4 */
   celt_pitch_xcorr_c,              /* EDSP */
   celt_pitch_xcorr_c,              /* Media */
   celt_pitch_xcorr_float_neon      /* Neon */
 };
 #  endif
 # endif /* FIXED_POINT */
 
+#if defined(FIXED_POINT) && defined(OPUS_HAVE_RTCD) && \
+ defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
+
+void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *y,
+         opus_val32       sum[4],
+         int              len
+) = {
+  xcorr_kernel_c,                /* ARMv4 */
+  xcorr_kernel_c,                /* EDSP */
+  xcorr_kernel_c,                /* Media */
+  xcorr_kernel_neon_fixed,       /* Neon */
+};
+
+#endif
+
 # if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 #  if defined(HAVE_ARM_NE10)
 #   if defined(CUSTOM_MODES)
 int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
    opus_fft_alloc_arch_c,        /* ARMv4 */
    opus_fft_alloc_arch_c,        /* EDSP */
    opus_fft_alloc_arch_c,        /* Media */
    opus_fft_alloc_arm_neon       /* Neon with NE10 library support */
--- a/media/libopus/celt/arm/armcpu.c
+++ b/media/libopus/celt/arm/armcpu.c
@@ -32,57 +32,60 @@
 #endif
 
 #ifdef OPUS_HAVE_RTCD
 
 #include "armcpu.h"
 #include "cpu_support.h"
 #include "os_support.h"
 #include "opus_types.h"
+#include "arch.h"
 
-#define OPUS_CPU_ARM_V4    (1)
-#define OPUS_CPU_ARM_EDSP  (1<<1)
-#define OPUS_CPU_ARM_MEDIA (1<<2)
-#define OPUS_CPU_ARM_NEON  (1<<3)
+#define OPUS_CPU_ARM_V4_FLAG    (1<<OPUS_ARCH_ARM_V4)
+#define OPUS_CPU_ARM_EDSP_FLAG  (1<<OPUS_ARCH_ARM_EDSP)
+#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
+#define OPUS_CPU_ARM_NEON_FLAG  (1<<OPUS_ARCH_ARM_NEON)
 
 #if defined(_MSC_VER)
 /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
 # define WIN32_LEAN_AND_MEAN
 # define WIN32_EXTRA_LEAN
 # include <windows.h>
 
 static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
   opus_uint32 flags;
   flags=0;
   /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
    * instructions via their assembled hex code.
    * All of these instructions should be essentially nops. */
-# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
   __try{
     /*PLD [r13]*/
     __emit(0xF5DDF000);
-    flags|=OPUS_CPU_ARM_EDSP;
+    flags|=OPUS_CPU_ARM_EDSP_FLAG;
   }
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
     /*Ignore exception.*/
   }
-#  if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+#  if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
   __try{
     /*SHADD8 r3,r3,r3*/
     __emit(0xE6333F93);
-    flags|=OPUS_CPU_ARM_MEDIA;
+    flags|=OPUS_CPU_ARM_MEDIA_FLAG;
   }
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
     /*Ignore exception.*/
   }
 #   if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
   __try{
     /*VORR q0,q0,q0*/
     __emit(0xF2200150);
-    flags|=OPUS_CPU_ARM_NEON;
+    flags|=OPUS_CPU_ARM_NEON_FLAG;
   }
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
     /*Ignore exception.*/
   }
 #   endif
 #  endif
 # endif
   return flags;
@@ -102,44 +105,44 @@ opus_uint32 opus_cpu_capabilities(void)
   if(cpuinfo != NULL)
   {
     /* 512 should be enough for anybody (it's even enough for all the flags that
      * x86 has accumulated... so far). */
     char buf[512];
 
     while(fgets(buf, 512, cpuinfo) != NULL)
     {
-# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
       /* Search for edsp and neon flag */
       if(memcmp(buf, "Features", 8) == 0)
       {
         char *p;
-#  if defined(OPUS_ARM_MAY_HAVE_EDSP)
         p = strstr(buf, " edsp");
         if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
-          flags |= OPUS_CPU_ARM_EDSP;
-#  endif
+          flags |= OPUS_CPU_ARM_EDSP_FLAG;
 
 #  if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
         p = strstr(buf, " neon");
         if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
-          flags |= OPUS_CPU_ARM_NEON;
+          flags |= OPUS_CPU_ARM_NEON_FLAG;
 #  endif
       }
 # endif
 
-# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
       /* Search for media capabilities (>= ARMv6) */
       if(memcmp(buf, "CPU architecture:", 17) == 0)
       {
         int version;
         version = atoi(buf+17);
 
         if(version >= 6)
-          flags |= OPUS_CPU_ARM_MEDIA;
+          flags |= OPUS_CPU_ARM_MEDIA_FLAG;
       }
 # endif
     }
 
     fclose(cpuinfo);
   }
   return flags;
 }
@@ -151,24 +154,32 @@ opus_uint32 opus_cpu_capabilities(void)
    "your platform.  Reconfigure with --disable-rtcd (or send patches)."
 #endif
 
 int opus_select_arch(void)
 {
   opus_uint32 flags = opus_cpu_capabilities();
   int arch = 0;
 
-  if(!(flags & OPUS_CPU_ARM_EDSP))
+  if(!(flags & OPUS_CPU_ARM_EDSP_FLAG)) {
+    /* Asserts ensure arch values are sequential */
+    celt_assert(arch == OPUS_ARCH_ARM_V4);
     return arch;
+  }
   arch++;
 
-  if(!(flags & OPUS_CPU_ARM_MEDIA))
+  if(!(flags & OPUS_CPU_ARM_MEDIA_FLAG)) {
+    celt_assert(arch == OPUS_ARCH_ARM_EDSP);
     return arch;
+  }
   arch++;
 
-  if(!(flags & OPUS_CPU_ARM_NEON))
+  if(!(flags & OPUS_CPU_ARM_NEON_FLAG)) {
+    celt_assert(arch == OPUS_ARCH_ARM_MEDIA);
     return arch;
+  }
   arch++;
 
+  celt_assert(arch == OPUS_ARCH_ARM_NEON);
   return arch;
 }
 
 #endif
--- a/media/libopus/celt/arm/armcpu.h
+++ b/media/libopus/celt/arm/armcpu.h
@@ -61,11 +61,17 @@
 # if defined(OPUS_ARM_PRESUME_NEON)
 #  define PRESUME_NEON(name) name ## _neon
 # else
 #  define PRESUME_NEON(name) PRESUME_MEDIA(name)
 # endif
 
 # if defined(OPUS_HAVE_RTCD)
 int opus_select_arch(void);
+
+#define OPUS_ARCH_ARM_V4    (0)
+#define OPUS_ARCH_ARM_EDSP  (1)
+#define OPUS_ARCH_ARM_MEDIA (2)
+#define OPUS_ARCH_ARM_NEON  (3)
+
 # endif
 
 #endif
--- a/media/libopus/celt/arm/celt_neon_intr.c
+++ b/media/libopus/celt/arm/celt_neon_intr.c
@@ -32,17 +32,76 @@
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include <arm_neon.h>
 #include "../pitch.h"
 
-#if !defined(FIXED_POINT)
+#if defined(FIXED_POINT)
+void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+{
+   int j;
+   int32x4_t a = vld1q_s32(sum);
+   /* Load y[0...3] */
+   /* This requires len>0 to always be valid (which we assert in the C code). */
+   int16x4_t y0 = vld1_s16(y);
+   y += 4;
+
+   for (j = 0; j + 8 <= len; j += 8)
+   {
+      /* Load x[0...7] */
+      int16x8_t xx = vld1q_s16(x);
+      int16x4_t x0 = vget_low_s16(xx);
+      int16x4_t x4 = vget_high_s16(xx);
+      /* Load y[4...11] */
+      int16x8_t yy = vld1q_s16(y);
+      int16x4_t y4 = vget_low_s16(yy);
+      int16x4_t y8 = vget_high_s16(yy);
+      int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0);
+      int32x4_t a1 = vmlal_lane_s16(a0, y4, x4, 0);
+
+      int16x4_t y1 = vext_s16(y0, y4, 1);
+      int16x4_t y5 = vext_s16(y4, y8, 1);
+      int32x4_t a2 = vmlal_lane_s16(a1, y1, x0, 1);
+      int32x4_t a3 = vmlal_lane_s16(a2, y5, x4, 1);
+
+      int16x4_t y2 = vext_s16(y0, y4, 2);
+      int16x4_t y6 = vext_s16(y4, y8, 2);
+      int32x4_t a4 = vmlal_lane_s16(a3, y2, x0, 2);
+      int32x4_t a5 = vmlal_lane_s16(a4, y6, x4, 2);
+
+      int16x4_t y3 = vext_s16(y0, y4, 3);
+      int16x4_t y7 = vext_s16(y4, y8, 3);
+      int32x4_t a6 = vmlal_lane_s16(a5, y3, x0, 3);
+      int32x4_t a7 = vmlal_lane_s16(a6, y7, x4, 3);
+
+      y0 = y8;
+      a = a7;
+      x += 8;
+      y += 8;
+   }
+
+   for (; j < len; j++)
+   {
+      int16x4_t x0 = vld1_dup_s16(x);  /* load next x */
+      int32x4_t a0 = vmlal_s16(a, y0, x0);
+
+      int16x4_t y4 = vld1_dup_s16(y);  /* load next y */
+      y0 = vext_s16(y0, y4, 1);
+      a = a0;
+      x++;
+      y++;
+   }
+
+   vst1q_s32(sum, a);
+}
+
+#else
 /*
  * Function: xcorr_kernel_neon_float
  * ---------------------------------
  * Computes 4 correlation values and stores them in sum[4]
  */
 static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y,
       float32_t sum[4], int len) {
    float32x4_t YY[3];
new file mode 100644
--- /dev/null
+++ b/media/libopus/celt/arm/fixed_arm64.h
@@ -0,0 +1,35 @@
+/* Copyright (C) 2015 Vidyo */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_ARM64_H
+#define FIXED_ARM64_H
+
+#include <arm_neon.h>
+
+#undef SIG2WORD16
+#define SIG2WORD16(x) (vqmovns_s32(PSHR32((x), SIG_SHIFT)))
+
+#endif
--- a/media/libopus/celt/arm/pitch_arm.h
+++ b/media/libopus/celt/arm/pitch_arm.h
@@ -41,28 +41,86 @@ opus_val32 celt_pitch_xcorr_neon(const o
 #   define celt_pitch_xcorr_media MAY_HAVE_EDSP(celt_pitch_xcorr)
 #  endif
 
 #  if defined(OPUS_ARM_MAY_HAVE_EDSP)
 opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
     opus_val32 *xcorr, int len, int max_pitch);
 #  endif
 
-#  if !defined(OPUS_HAVE_RTCD)
+#  if defined(OPUS_HAVE_RTCD) && \
+    ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
+     (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
+     (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
+extern opus_val32
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+      const opus_val16 *, opus_val32 *, int, int);
+#   define OVERRIDE_PITCH_XCORR (1)
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch))
+
+#  elif defined(OPUS_ARM_PRESUME_EDSP) || \
+    defined(OPUS_ARM_PRESUME_MEDIA) || \
+    defined(OPUS_ARM_PRESUME_NEON)
 #   define OVERRIDE_PITCH_XCORR (1)
 #   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
   ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
+
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+void xcorr_kernel_neon_fixed(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    opus_val32       sum[4],
+                    int              len);
+#  endif
+
+#  if defined(OPUS_HAVE_RTCD) && \
+    (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
+
+extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    opus_val32       sum[4],
+                    int              len);
+
+#   define OVERRIDE_XCORR_KERNEL (1)
+#   define xcorr_kernel(x, y, sum, len, arch) \
+     ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
+
+#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
+#   define OVERRIDE_XCORR_KERNEL (1)
+#   define xcorr_kernel(x, y, sum, len, arch) \
+      ((void)arch, xcorr_kernel_neon_fixed(x, y, sum, len))
+
 #  endif
 
 #else /* Start !FIXED_POINT */
 /* Float case */
 #if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
                                  opus_val32 *xcorr, int len, int max_pitch);
-#if !defined(OPUS_HAVE_RTCD) || defined(OPUS_ARM_PRESUME_NEON_INTR)
-#define OVERRIDE_PITCH_XCORR (1)
+#endif
+
+#  if defined(OPUS_HAVE_RTCD) && \
+    (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
+extern void
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+      const opus_val16 *, opus_val32 *, int, int);
+
+#  define OVERRIDE_PITCH_XCORR (1)
+#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch))
+
+#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
+
+#   define OVERRIDE_PITCH_XCORR (1)
 #   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
    ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
-#endif
-#endif
+
+#  endif
 
 #endif /* end !FIXED_POINT */
+
 #endif
--- a/media/libopus/celt/bands.c
+++ b/media/libopus/celt/bands.c
@@ -409,17 +409,17 @@ static void stereo_merge(celt_norm * OPU
 #endif
    opus_val32 t, lgain, rgain;
 
    /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
    dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
    /* Compensating for the mid normalization */
    xp = MULT16_32_Q15(mid, xp);
    /* mid and side are in Q15, not Q14 like X and Y */
-   mid2 = SHR32(mid, 1);
+   mid2 = SHR16(mid, 1);
    El = MULT16_16(mid2, mid2) + side - 2*xp;
    Er = MULT16_16(mid2, mid2) + side + 2*xp;
    if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
    {
       OPUS_COPY(Y, X, N);
       return;
    }
 
@@ -709,17 +709,17 @@ static void compute_theta(struct band_ct
          mid and side because we know that 1) they have unit norm and
          2) they are orthogonal. */
       itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
    }
    tell = ec_tell_frac(ec);
    if (qn!=1)
    {
       if (encode)
-         itheta = (itheta*qn+8192)>>14;
+         itheta = (itheta*(opus_int32)qn+8192)>>14;
 
       /* Entropy coding of the angle. We use a uniform pdf for the
          time split, a step for stereo, and a triangular one for the rest. */
       if (stereo && N>2)
       {
          int p0 = 3;
          int x = itheta;
          int x0 = qn/2;
--- a/media/libopus/celt/celt.h
+++ b/media/libopus/celt/celt.h
@@ -204,17 +204,17 @@ void comb_filter(opus_val32 *y, opus_val
       const opus_val16 *window, int overlap, int arch);
 
 #ifdef NON_STATIC_COMB_FILTER_CONST_C
 void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
                          opus_val16 g10, opus_val16 g11, opus_val16 g12);
 #endif
 
 #ifndef OVERRIDE_COMB_FILTER_CONST
-# define comb_filter_const(y, x, T, N, g10, g11, g12, arch)		\
+# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
     ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
 #endif
 
 void init_caps(const CELTMode *m,int *cap,int LM,int C);
 
 #ifdef RESYNTH
 void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem);
 void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
--- a/media/libopus/celt/celt_decoder.c
+++ b/media/libopus/celt/celt_decoder.c
@@ -77,16 +77,17 @@ struct OpusCustomDecoder {
 
    /* Everything beyond this point gets cleared on a reset */
 #define DECODER_RESET_START rng
 
    opus_uint32 rng;
    int error;
    int last_pitch_index;
    int loss_count;
+   int skip_plc;
    int postfilter_period;
    int postfilter_period_old;
    opus_val16 postfilter_gain;
    opus_val16 postfilter_gain_old;
    int postfilter_tapset;
    int postfilter_tapset_old;
 
    celt_sig preemph_memD[2];
@@ -159,18 +160,16 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_dec
    st->stream_channels = st->channels = channels;
 
    st->downsample = 1;
    st->start = 0;
    st->end = st->mode->effEBands;
    st->signalling = 1;
    st->arch = opus_select_arch();
 
-   st->loss_count = 0;
-
    opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
 
    return OPUS_OK;
 }
 
 #ifdef CUSTOM_MODES
 void opus_custom_decoder_destroy(CELTDecoder *st)
 {
@@ -442,17 +441,17 @@ static void celt_decode_lost(CELTDecoder
    lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C);
    oldBandE = lpc+C*LPC_ORDER;
    oldLogE = oldBandE + 2*nbEBands;
    oldLogE2 = oldLogE + 2*nbEBands;
    backgroundLogE = oldLogE2  + 2*nbEBands;
 
    loss_count = st->loss_count;
    start = st->start;
-   noise_based = loss_count >= 5 || start != 0;
+   noise_based = loss_count >= 5 || start != 0 || st->skip_plc;
    if (noise_based)
    {
       /* Noise-based PLC/CNG */
 #ifdef NORM_ALIASING_HACK
       celt_norm *X;
 #else
       VARDECL(celt_norm, X);
 #endif
@@ -827,16 +826,20 @@ int celt_decode_with_ec(CELTDecoder * OP
    if (data == NULL || len<=1)
    {
       celt_decode_lost(st, N, LM);
       deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
       RESTORE_STACK;
       return frame_size/st->downsample;
    }
 
+   /* Check if there are at least two packets received consecutively before
+    * turning on the pitch-based PLC */
+   st->skip_plc = st->loss_count != 0;
+
    if (dec == NULL)
    {
       ec_dec_init(&_dec,(unsigned char*)data,len);
       dec = &_dec;
    }
 
    if (C==1)
    {
@@ -1193,16 +1196,17 @@ int opus_custom_decoder_ctl(CELTDecoder 
          oldBandE = lpc+st->channels*LPC_ORDER;
          oldLogE = oldBandE + 2*st->mode->nbEBands;
          oldLogE2 = oldLogE + 2*st->mode->nbEBands;
          OPUS_CLEAR((char*)&st->DECODER_RESET_START,
                opus_custom_decoder_get_size(st->mode, st->channels)-
                ((char*)&st->DECODER_RESET_START - (char*)st));
          for (i=0;i<2*st->mode->nbEBands;i++)
             oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
+         st->skip_plc = 1;
       }
       break;
       case OPUS_GET_PITCH_REQUEST:
       {
          opus_int32 *value = va_arg(ap, opus_int32*);
          if (value==NULL)
             goto bad_arg;
          *value = st->postfilter_period;
--- a/media/libopus/celt/celt_encoder.c
+++ b/media/libopus/celt/celt_encoder.c
@@ -1170,20 +1170,20 @@ static int run_prefilter(CELTEncoder *st
 
       comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
             st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1,
             st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch);
       OPUS_COPY(st->in_mem+c*(overlap), in+c*(N+overlap)+N, overlap);
 
       if (N>COMBFILTER_MAXPERIOD)
       {
-         OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
+         OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
       } else {
          OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
-         OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
+         OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
       }
    } while (++c<CC);
 
    RESTORE_STACK;
    *gain = gain1;
    *pitch = pitch_index;
    *qgain = qg;
    return pf_on;
@@ -1276,22 +1276,25 @@ static int compute_vbr(const CELTMode *m
       floor_depth = (opus_int32)SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT);
       floor_depth = IMAX(floor_depth, target>>2);
       target = IMIN(target, floor_depth);
       /*printf("%f %d\n", maxDepth, floor_depth);*/
    }
 
    if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000))
    {
-      opus_val16 rate_factor;
+      opus_val16 rate_factor = Q15ONE;
+      if (bitrate < 64000)
+      {
 #ifdef FIXED_POINT
-      rate_factor = MAX16(0,(bitrate-32000));
+         rate_factor = MAX16(0,(bitrate-32000));
 #else
-      rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
+         rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
 #endif
+      }
       if (constrained_vbr)
          rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
       target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target);
 
    }
 
    if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14))
    {
--- a/media/libopus/celt/celt_lpc.c
+++ b/media/libopus/celt/celt_lpc.c
@@ -44,18 +44,17 @@ int          p
    opus_val32 r;
    opus_val32 error = ac[0];
 #ifdef FIXED_POINT
    opus_val32 lpc[LPC_ORDER];
 #else
    float *lpc = _lpc;
 #endif
 
-   for (i = 0; i < p; i++)
-      lpc[i] = 0;
+   OPUS_CLEAR(lpc, p);
    if (ac[0] != 0)
    {
       for (i = 0; i < p; i++) {
          /* Sum up this iteration's reflection coefficient */
          opus_val32 rr = 0;
          for (j = 0; j < i; j++)
             rr += MULT32_32_Q31(lpc[j],ac[i - j]);
          rr += SHR32(ac[i + 1],3);
--- a/media/libopus/celt/cwrs.c
+++ b/media/libopus/celt/cwrs.c
@@ -69,17 +69,17 @@ int log2_frac(opus_uint32 val, int frac)
   /*Exact powers of two require no rounding.*/
   else return (l-1)<<frac;
 }
 #endif
 
 /*Although derived separately, the pulse vector coding scheme is equivalent to
    a Pyramid Vector Quantizer \cite{Fis86}.
   Some additional notes about an early version appear at
-   http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
+   https://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
    and the definitions of some terms have evolved since that was written.
 
   The conversion from a pulse vector to an integer index (encoding) and back
    (decoding) is governed by two related functions, V(N,K) and U(N,K).
 
   V(N,K) = the number of combinations, with replacement, of N items, taken K
    at a time, when a sign bit is added to each item taken at least once (i.e.,
    the number of N-dimensional unit pulse vectors with K pulses).
--- a/media/libopus/celt/fixed_generic.h
+++ b/media/libopus/celt/fixed_generic.h
@@ -32,26 +32,42 @@
 
 #ifndef FIXED_GENERIC_H
 #define FIXED_GENERIC_H
 
 /** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */
 #define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
 
 /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),16))
+#else
 #define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+#endif
 
 /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT16_32_P16(a,b) ((opus_val32)PSHR((opus_int64)((opus_val16)(a))*(b),16))
+#else
 #define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+#endif
 
 /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT16_32_Q15(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),15))
+#else
 #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
+#endif
 
 /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31))
+#else
 #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+#endif
 
 /** Compile-time conversion of float constant to 16-bit value */
 #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
 
 /** Compile-time conversion of float constant to 32-bit value */
 #define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits))))
 
 /** Negate a 16-bit value */
--- a/media/libopus/celt/kiss_fft.c
+++ b/media/libopus/celt/kiss_fft.c
@@ -186,17 +186,17 @@ static void kf_bfly3(
    size_t k;
    const size_t m2 = 2*m;
    const kiss_twiddle_cpx *tw1,*tw2;
    kiss_fft_cpx scratch[5];
    kiss_twiddle_cpx epi3;
 
    kiss_fft_cpx * Fout_beg = Fout;
 #ifdef FIXED_POINT
-   epi3.r = -16384;
+   /*epi3.r = -16384;*/ /* Unused */
    epi3.i = -28378;
 #else
    epi3 = st->twiddles[fstride*m];
 #endif
    for (i=0;i<N;i++)
    {
       Fout = Fout_beg + i*mm;
       tw1=tw2=st->twiddles;
--- a/media/libopus/celt/mathops.c
+++ b/media/libopus/celt/mathops.c
@@ -159,17 +159,17 @@ opus_val16 celt_cos_norm(opus_val32 x)
    if (x>SHL32(EXTEND32(1), 16))
       x = SUB32(SHL32(EXTEND32(1), 17),x);
    if (x&0x00007fff)
    {
       if (x<SHL32(EXTEND32(1), 15))
       {
          return _celt_cos_pi_2(EXTRACT16(x));
       } else {
-         return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
+         return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x)));
       }
    } else {
       if (x&0x0000ffff)
          return 0;
       else if (x&0x0001ffff)
          return -32767;
       else
          return 32767;
--- a/media/libopus/celt/pitch.c
+++ b/media/libopus/celt/pitch.c
@@ -407,16 +407,51 @@ void pitch_search(const opus_val16 * OPU
    } else {
       offset = 0;
    }
    *pitch = 2*best_pitch[0]-offset;
 
    RESTORE_STACK;
 }
 
+#ifdef FIXED_POINT
+static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
+{
+   opus_val32 x2y2;
+   int sx, sy, shift;
+   opus_val32 g;
+   opus_val16 den;
+   if (xy == 0 || xx == 0 || yy == 0)
+      return 0;
+   sx = celt_ilog2(xx)-14;
+   sy = celt_ilog2(yy)-14;
+   shift = sx + sy;
+   x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy));
+   if (shift & 1) {
+      if (x2y2 < 32768)
+      {
+         x2y2 <<= 1;
+         shift--;
+      } else {
+         x2y2 >>= 1;
+         shift++;
+      }
+   }
+   den = celt_rsqrt_norm(x2y2);
+   g = MULT16_32_Q15(den, xy);
+   g = VSHR32(g, (shift>>1)-1);
+   return EXTRACT16(MIN32(g, Q15ONE));
+}
+#else
+static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
+{
+   return xy/celt_sqrt(1+xx*yy);
+}
+#endif
+
 static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
 opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
       int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
 {
    int k, i, T, T0;
    opus_val16 g, g0;
    opus_val16 pg;
    opus_val32 xy,xx,yy,xy2;
@@ -445,28 +480,17 @@ opus_val16 remove_doubling(opus_val16 *x
    for (i=1;i<=maxperiod;i++)
    {
       yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
       yy_lookup[i] = MAX32(0, yy);
    }
    yy = yy_lookup[T0];
    best_xy = xy;
    best_yy = yy;
-#ifdef FIXED_POINT
-      {
-         opus_val32 x2y2;
-         int sh, t;
-         x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy));
-         sh = celt_ilog2(x2y2)>>1;
-         t = VSHR32(x2y2, 2*(sh-7));
-         g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
-      }
-#else
-      g = g0 = xy/celt_sqrt(1+xx*yy);
-#endif
+   g = g0 = compute_pitch_gain(xy, xx, yy);
    /* Look for any pitch at T/k */
    for (k=2;k<=15;k++)
    {
       int T1, T1b;
       opus_val16 g1;
       opus_val16 cont=0;
       opus_val16 thresh;
       T1 = celt_udiv(2*T0+k, 2*k);
@@ -479,34 +503,23 @@ opus_val16 remove_doubling(opus_val16 *x
             T1b = T0;
          else
             T1b = T0+T1;
       } else
       {
          T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
       }
       dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
-      xy += xy2;
-      yy = yy_lookup[T1] + yy_lookup[T1b];
-#ifdef FIXED_POINT
-      {
-         opus_val32 x2y2;
-         int sh, t;
-         x2y2 = 1+MULT32_32_Q31(xx,yy);
-         sh = celt_ilog2(x2y2)>>1;
-         t = VSHR32(x2y2, 2*(sh-7));
-         g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
-      }
-#else
-      g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy);
-#endif
+      xy = HALF32(xy + xy2);
+      yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);
+      g1 = compute_pitch_gain(xy, xx, yy);
       if (abs(T1-prev_period)<=1)
          cont = prev_gain;
       else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
-         cont = HALF32(prev_gain);
+         cont = HALF16(prev_gain);
       else
          cont = 0;
       thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
       /* Bias against very high pitch (very short period) to avoid false-positives
          due to short-term correlation */
       if (T1<3*minperiod)
          thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
       else if (T1<2*minperiod)
--- a/media/libopus/celt/pitch.h
+++ b/media/libopus/celt/pitch.h
@@ -182,39 +182,19 @@ void comb_filter_const_c(opus_val32 *y, 
 opus_val32
 #else
 void
 #endif
 celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
       opus_val32 *xcorr, int len, int max_pitch);
 
 #if !defined(OVERRIDE_PITCH_XCORR)
-/*Is run-time CPU detection enabled on this platform?*/
-# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \
-   || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \
-   && !defined(OPUS_ARM_PRESUME_NEON_INTR)))
-extern
-#  if defined(FIXED_POINT)
-opus_val32
-#  else
-void
-#  endif
-(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
-      const opus_val16 *, opus_val32 *, int, int);
-
-#  define OVERRIDE_PITCH_XCORR
-#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
-  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
-        xcorr, len, max_pitch))
-# else
-
 #ifdef FIXED_POINT
 opus_val32
 #else
 void
 #endif
 celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
       opus_val32 *xcorr, int len, int max_pitch, int arch);
 
-# endif
 #endif
 
 #endif
--- a/media/libopus/celt/rate.c
+++ b/media/libopus/celt/rate.c
@@ -291,17 +291,17 @@ static OPUS_INLINE int interp_bits2pulse
       else
          lo = mid;
    }
    psum = 0;
    /*printf ("interp bisection gave %d\n", lo);*/
    done = 0;
    for (j=end;j-->start;)
    {
-      int tmp = bits1[j] + (lo*bits2[j]>>ALLOC_STEPS);
+      int tmp = bits1[j] + ((opus_int32)lo*bits2[j]>>ALLOC_STEPS);
       if (tmp < thresh[j] && !done)
       {
          if (tmp >= alloc_floor)
             tmp = alloc_floor;
          else
             tmp = 0;
       } else
          done = 1;
--- a/media/libopus/celt/vq.c
+++ b/media/libopus/celt/vq.c
@@ -266,17 +266,17 @@ unsigned alg_quant(celt_norm *X, int N, 
       int rshift;
 #endif
 #ifdef FIXED_POINT
       rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
 #endif
       best_id = 0;
       /* The squared magnitude term gets added anyway, so we might as well
          add it outside the loop */
-      yy = ADD32(yy, 1);
+      yy = ADD16(yy, 1);
       j=0;
       do {
          opus_val16 Rxy, Ryy;
          /* Temporary sums of the new pulse(s) */
          Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
          /* We're multiplying y[j] by two so we don't have to do it here */
          Ryy = ADD16(yy, y[j]);
 
--- a/media/libopus/celt/x86/pitch_sse.h
+++ b/media/libopus/celt/x86/pitch_sse.h
@@ -97,31 +97,31 @@ opus_val32 celt_inner_prod_sse(
     const opus_val16 *y,
     int               N);
 #endif
 
 
 #if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
 #define OVERRIDE_CELT_INNER_PROD
 #define celt_inner_prod(x, y, N, arch) \
-	((void)arch, celt_inner_prod_sse4_1(x, y, N))
+    ((void)arch, celt_inner_prod_sse4_1(x, y, N))
 
 #elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
 #define OVERRIDE_CELT_INNER_PROD
 #define celt_inner_prod(x, y, N, arch) \
-	((void)arch, celt_inner_prod_sse2(x, y, N))
+    ((void)arch, celt_inner_prod_sse2(x, y, N))
 
 #elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
 #define OVERRIDE_CELT_INNER_PROD
 #define celt_inner_prod(x, y, N, arch) \
-	((void)arch, celt_inner_prod_sse(x, y, N))
+    ((void)arch, celt_inner_prod_sse(x, y, N))
 
 
 #elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \
-	(defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
+    (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
 
 extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
                     const opus_val16 *x,
                     const opus_val16 *y,
                     int               N);
 
 #define OVERRIDE_CELT_INNER_PROD
 #define celt_inner_prod(x, y, N, arch) \
@@ -133,29 +133,29 @@ extern opus_val32 (*const CELT_INNER_PRO
 
 #define OVERRIDE_DUAL_INNER_PROD
 #define OVERRIDE_COMB_FILTER_CONST
 
 #undef dual_inner_prod
 #undef comb_filter_const
 
 void dual_inner_prod_sse(const opus_val16 *x,
-	const opus_val16 *y01,
-	const opus_val16 *y02,
-	int               N,
-	opus_val32       *xy1,
-	opus_val32       *xy2);
+    const opus_val16 *y01,
+    const opus_val16 *y02,
+    int               N,
+    opus_val32       *xy1,
+    opus_val32       *xy2);
 
 void comb_filter_const_sse(opus_val32 *y,
-	opus_val32 *x,
-	int         T,
-	int         N,
-	opus_val16  g10,
-	opus_val16  g11,
-	opus_val16  g12);
+    opus_val32 *x,
+    int         T,
+    int         N,
+    opus_val16  g10,
+    opus_val16  g11,
+    opus_val16  g12);
 
 
 #if defined(OPUS_X86_PRESUME_SSE)
 # define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
     ((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2))
 
 # define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
     ((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12))
@@ -164,29 +164,29 @@ void comb_filter_const_sse(opus_val32 *y
 extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
               const opus_val16 *x,
               const opus_val16 *y01,
               const opus_val16 *y02,
               int               N,
               opus_val32       *xy1,
               opus_val32       *xy2);
 
-#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch)			\
+#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
     ((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
 
 extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
               opus_val32 *y,
               opus_val32 *x,
               int         T,
               int         N,
               opus_val16  g10,
               opus_val16  g11,
               opus_val16  g12);
 
-#define comb_filter_const(y, x, T, N, g10, g11, g12, arch)				\
+#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
     ((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12))
 
 #define NON_STATIC_COMB_FILTER_CONST_C
 
 #endif
 #endif
 
 #endif
--- a/media/libopus/celt/x86/x86_celt_map.c
+++ b/media/libopus/celt/x86/x86_celt_map.c
@@ -67,17 +67,17 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCH
   xcorr_kernel_c,
   MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1  */
   MAY_HAVE_SSE4_1(xcorr_kernel)  /* avx  */
 };
 
 #endif
 
 #if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) ||  \
-	(!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
+ (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
 
 opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
          const opus_val16 *x,
          const opus_val16 *y,
          int              N
 ) = {
   celt_inner_prod_c,                /* non-sse */
   celt_inner_prod_c,
--- a/media/libopus/celt/x86/x86cpu.c
+++ b/media/libopus/celt/x86/x86cpu.c
@@ -41,17 +41,17 @@
   (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
 
 
 #if defined(_MSC_VER)
 
 #include <intrin.h>
 static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
 {
-	__cpuid((int*)CPUInfo, InfoType);
+    __cpuid((int*)CPUInfo, InfoType);
 }
 
 #else
 
 #if defined(CPU_INFO_BY_C)
 #include <cpuid.h>
 #endif
 
--- a/media/libopus/include/opus.h
+++ b/media/libopus/include/opus.h
@@ -137,17 +137,17 @@ extern "C" {
   * <li>frame_size is the duration of the frame in samples (per channel)</li>
   * <li>packet is the byte array to which the compressed data is written</li>
   * <li>max_packet is the maximum number of bytes that can be written in the packet (4000 bytes is recommended).
   *     Do not use max_packet to control VBR target bitrate, instead use the #OPUS_SET_BITRATE CTL.</li>
   * </ul>
   *
   * opus_encode() and opus_encode_float() return the number of bytes actually written to the packet.
   * The return value <b>can be negative</b>, which indicates that an error has occurred. If the return value
-  * is 1 byte, then the packet does not need to be transmitted (DTX).
+  * is 2 bytes or less, then the packet does not need to be transmitted (DTX).
   *
   * Once the encoder state if no longer needed, it can be destroyed with
   *
   * @code
   * opus_encoder_destroy(enc);
   * @endcode
   *
   * If the encoder was created with opus_encoder_init() rather than opus_encoder_create(),
--- a/media/libopus/include/opus_defines.h
+++ b/media/libopus/include/opus_defines.h
@@ -60,17 +60,17 @@ extern "C" {
 #define OPUS_ALLOC_FAIL       -7
 /**@}*/
 
 /** @cond OPUS_INTERNAL_DOC */
 /**Export control for opus functions */
 
 #ifndef OPUS_EXPORT
 # if defined(WIN32)
-#  ifdef OPUS_BUILD
+#  if defined(OPUS_BUILD) && defined(DLL_EXPORT)
 #   define OPUS_EXPORT __declspec(dllexport)
 #  else
 #   define OPUS_EXPORT
 #  endif
 # elif defined(__GNUC__) && defined(OPUS_BUILD)
 #  define OPUS_EXPORT __attribute__ ((visibility ("default")))
 # else
 #  define OPUS_EXPORT
--- a/media/libopus/include/opus_multistream.h
+++ b/media/libopus/include/opus_multistream.h
@@ -105,20 +105,20 @@ extern "C" {
   *
   * The multistream API allows individual Opus streams to be combined into a
   * single packet, enabling support for up to 255 channels. Unlike an
   * elementary Opus stream, the encoder and decoder must negotiate the channel
   * configuration before the decoder can successfully interpret the data in the
   * packets produced by the encoder. Some basic information, such as packet
   * duration, can be computed without any special negotiation.
   *
-  * The format for multistream Opus packets is defined in the
-  * <a href="https://tools.ietf.org/html/draft-ietf-codec-oggopus">Ogg
-  * encapsulation specification</a> and is based on the self-delimited Opus
-  * framing described in Appendix B of <a href="https://tools.ietf.org/html/rfc6716">RFC 6716</a>.
+  * The format for multistream Opus packets is defined in
+  * <a href="https://tools.ietf.org/html/rfc7845">RFC 7845</a>
+  * and is based on the self-delimited Opus framing described in Appendix B of
+  * <a href="https://tools.ietf.org/html/rfc6716">RFC 6716</a>.
   * Normal Opus packets are just a degenerate case of multistream Opus packets,
   * and can be encoded or decoded with the multistream API by setting
   * <code>streams</code> to <code>1</code> when initializing the encoder or
   * decoder.
   *
   * Multistream Opus streams can contain up to 255 elementary Opus streams.
   * These may be either "uncoupled" or "coupled", indicating that the decoder
   * is configured to decode them to either 1 or 2 channels, respectively.
--- a/media/libopus/moz.build
+++ b/media/libopus/moz.build
@@ -15,17 +15,17 @@ EXPORTS.opus += [
 ]
 
 # We allow warnings for third-party code that can be updated from upstream.
 ALLOW_COMPILER_WARNINGS = True
 
 FINAL_LIBRARY = 'gkmedias'
 
 DEFINES['OPUS_BUILD'] = True
-DEFINES['OPUS_VERSION'] = '"v1.1.2-mozilla"'
+DEFINES['OPUS_VERSION'] = '"v1.1.3-mozilla"'
 DEFINES['USE_ALLOCA'] = True
 
 # Don't export symbols
 DEFINES['OPUS_EXPORT'] = ''
 
 if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['GNU_AS']:
     DEFINES['OPUS_ARM_ASM'] = True
     DEFINES['OPUS_ARM_EXTERNAL_ASM'] = True
--- a/media/libopus/silk/CNG.c
+++ b/media/libopus/silk/CNG.c
@@ -29,19 +29,18 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "config.h"
 #endif
 
 #include "main.h"
 #include "stack_alloc.h"
 
 /* Generates excitation for CNG LPC synthesis */
 static OPUS_INLINE void silk_CNG_exc(
-    opus_int32                       exc_Q10[],          /* O    CNG excitation signal Q10                   */
+    opus_int32                       exc_Q14[],          /* O    CNG excitation signal Q10                   */
     opus_int32                       exc_buf_Q14[],      /* I    Random samples buffer Q10                   */
-    opus_int32                       Gain_Q16,           /* I    Gain to apply                               */
     opus_int                         length,             /* I    Length                                      */
     opus_int32                       *rand_seed          /* I/O  Seed to random index generator              */
 )
 {
     opus_int32 seed;
     opus_int   i, idx, exc_mask;
 
     exc_mask = CNG_BUF_MASK_MAX;
@@ -50,17 +49,17 @@ static OPUS_INLINE void silk_CNG_exc(
     }
 
     seed = *rand_seed;
     for( i = 0; i < length; i++ ) {
         seed = silk_RAND( seed );
         idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
         silk_assert( idx >= 0 );
         silk_assert( idx <= CNG_BUF_MASK_MAX );
-        exc_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
+        exc_Q14[ i ] = exc_buf_Q14[ idx ];
     }
     *rand_seed = seed;
 }
 
 void silk_CNG_Reset(
     silk_decoder_state          *psDec                          /* I/O  Decoder state                               */
 )
 {
@@ -80,17 +79,17 @@ void silk_CNG_Reset(
 void silk_CNG(
     silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
     silk_decoder_control        *psDecCtrl,                     /* I/O  Decoder control                             */
     opus_int16                  frame[],                        /* I/O  Signal                                      */
     opus_int                    length                          /* I    Length of residual                          */
 )
 {
     opus_int   i, subfr;
-    opus_int32 sum_Q6, max_Gain_Q16, gain_Q16;
+    opus_int32 LPC_pred_Q10, max_Gain_Q16, gain_Q16, gain_Q10;
     opus_int16 A_Q12[ MAX_LPC_ORDER ];
     silk_CNG_struct *psCNG = &psDec->sCNG;
     SAVE_STACK;
 
     if( psDec->fs_kHz != psCNG->fs_kHz ) {
         /* Reset state */
         silk_CNG_Reset( psDec );
 
@@ -119,63 +118,67 @@ void silk_CNG(
         /* Smooth gains */
         for( i = 0; i < psDec->nb_subfr; i++ ) {
             psCNG->CNG_smth_Gain_Q16 += silk_SMULWB( psDecCtrl->Gains_Q16[ i ] - psCNG->CNG_smth_Gain_Q16, CNG_GAIN_SMTH_Q16 );
         }
     }
 
     /* Add CNG when packet is lost or during DTX */
     if( psDec->lossCnt ) {
-        VARDECL( opus_int32, CNG_sig_Q10 );
-        ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );
+        VARDECL( opus_int32, CNG_sig_Q14 );
+        ALLOC( CNG_sig_Q14, length + MAX_LPC_ORDER, opus_int32 );
 
         /* Generate CNG excitation */
         gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] );
         if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) {
             gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 );
             gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
             gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 );
         } else {
             gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 );
             gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
             gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 );
         }
-        silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, gain_Q16, length, &psCNG->rand_seed );
+        gain_Q10 = silk_RSHIFT( gain_Q16, 6 );
+        
+        silk_CNG_exc( CNG_sig_Q14 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, length, &psCNG->rand_seed );
 
         /* Convert CNG NLSF to filter representation */
         silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
 
         /* Generate CNG signal, by synthesis filtering */
-        silk_memcpy( CNG_sig_Q10, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+        silk_memcpy( CNG_sig_Q14, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
         for( i = 0; i < length; i++ ) {
             silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
             /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
-            sum_Q6 = silk_RSHIFT( psDec->LPC_order, 1 );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  1 ], A_Q12[ 0 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  2 ], A_Q12[ 1 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  3 ], A_Q12[ 2 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  4 ], A_Q12[ 3 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  5 ], A_Q12[ 4 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  6 ], A_Q12[ 5 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  7 ], A_Q12[ 6 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  8 ], A_Q12[ 7 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  9 ], A_Q12[ 8 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
+            LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  1 ], A_Q12[ 0 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  2 ], A_Q12[ 1 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  3 ], A_Q12[ 2 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  4 ], A_Q12[ 3 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  5 ], A_Q12[ 4 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  6 ], A_Q12[ 5 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  7 ], A_Q12[ 6 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  8 ], A_Q12[ 7 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  9 ], A_Q12[ 8 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
             if( psDec->LPC_order == 16 ) {
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
             }
 
             /* Update states */
-            CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 );
-
-            frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( CNG_sig_Q10[ MAX_LPC_ORDER + i ], 10 ) );
+            CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q14[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
+            
+            /* Scale with Gain and add to input signal */
+            frame[ i ] = (opus_int16)silk_ADD_SAT16( frame[ i ], silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( CNG_sig_Q14[ MAX_LPC_ORDER + i ], gain_Q10 ), 8 ) ) );
+            
         }
-        silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
+        silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q14[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
     } else {
         silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order *  sizeof( opus_int32 ) );
     }
     RESTORE_STACK;
 }
--- a/media/libopus/silk/NLSF_del_dec_quant.c
+++ b/media/libopus/silk/NLSF_del_dec_quant.c
@@ -41,18 +41,19 @@ opus_int32 silk_NLSF_del_dec_quant(     
     const opus_uint8            ec_rates_Q5[],                  /* I    Rates []                                    */
     const opus_int              quant_step_size_Q16,            /* I    Quantization step size                      */
     const opus_int16            inv_quant_step_size_Q6,         /* I    Inverse quantization step size              */
     const opus_int32            mu_Q20,                         /* I    R/D tradeoff                                */
     const opus_int16            order                           /* I    Number of input values                      */
 )
 {
     opus_int         i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10;
-    opus_int         pred_Q10, diff_Q10, out0_Q10, out1_Q10, rate0_Q5, rate1_Q5;
-    opus_int32       RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25, pred_coef_Q16;
+    opus_int         pred_Q10, diff_Q10, rate0_Q5, rate1_Q5;
+    opus_int16       out0_Q10, out1_Q10;
+    opus_int32       RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25;
     opus_int         ind_sort[         NLSF_QUANT_DEL_DEC_STATES ];
     opus_int8        ind[              NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ];
     opus_int16       prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ];
     opus_int32       RD_Q25[       2 * NLSF_QUANT_DEL_DEC_STATES ];
     opus_int32       RD_min_Q25[       NLSF_QUANT_DEL_DEC_STATES ];
     opus_int32       RD_max_Q25[       NLSF_QUANT_DEL_DEC_STATES ];
     const opus_uint8 *rates_Q5;
 
@@ -69,33 +70,32 @@ opus_int32 silk_NLSF_del_dec_quant(     
         } else if( i == 0 ) {
             out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
         } else if( i == -1 ) {
             out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
         } else {
             out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
             out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
         }
-        out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 );
-        out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 );
+        out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out0_Q10, quant_step_size_Q16 ), 16 );
+        out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out1_Q10, quant_step_size_Q16 ), 16 );
     }
 
     silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 );     /* must be power of two */
 
     nStates = 1;
     RD_Q25[ 0 ] = 0;
     prev_out_Q10[ 0 ] = 0;
     for( i = order - 1; ; i-- ) {
         rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ];
-        pred_coef_Q16 = silk_LSHIFT( (opus_int32)pred_coef_Q8[ i ], 8 );
         in_Q10 = x_Q10[ i ];
         for( j = 0; j < nStates; j++ ) {
-            pred_Q10 = silk_SMULWB( pred_coef_Q16, prev_out_Q10[ j ] );
+            pred_Q10 = silk_RSHIFT( silk_SMULBB( (opus_int16)pred_coef_Q8[ i ], prev_out_Q10[ j ] ), 8 );
             res_Q10  = silk_SUB16( in_Q10, pred_Q10 );
-            ind_tmp  = silk_SMULWB( (opus_int32)inv_quant_step_size_Q6, res_Q10 );
+            ind_tmp  = silk_RSHIFT( silk_SMULBB( inv_quant_step_size_Q6, res_Q10 ), 16 );
             ind_tmp  = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 );
             ind[ j ][ i ] = (opus_int8)ind_tmp;
 
             /* compute outputs for ind_tmp and ind_tmp + 1 */
             out0_Q10 = out0_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ];
             out1_Q10 = out1_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ];
 
             out0_Q10  = silk_ADD16( out0_Q10, pred_Q10 );
--- a/media/libopus/silk/NLSF_encode.c
+++ b/media/libopus/silk/NLSF_encode.c
@@ -41,17 +41,17 @@ opus_int32 silk_NLSF_encode(            
     const silk_NLSF_CB_struct   *psNLSF_CB,                     /* I    Codebook object                             */
     const opus_int16            *pW_QW,                         /* I    NLSF weight vector [ LPC_ORDER ]            */
     const opus_int              NLSF_mu_Q20,                    /* I    Rate weight for the RD optimization         */
     const opus_int              nSurvivors,                     /* I    Max survivors after first stage             */
     const opus_int              signalType                      /* I    Signal type: 0/1/2                          */
 )
 {
     opus_int         i, s, ind1, bestIndex, prob_Q8, bits_q7;
-    opus_int32       W_tmp_Q9;
+    opus_int32       W_tmp_Q9, ret;
     VARDECL( opus_int32, err_Q26 );
     VARDECL( opus_int32, RD_Q25 );
     VARDECL( opus_int, tempIndices1 );
     VARDECL( opus_int8, tempIndices2 );
     opus_int16       res_Q15[      MAX_LPC_ORDER ];
     opus_int16       res_Q10[      MAX_LPC_ORDER ];
     opus_int16       NLSF_tmp_Q15[ MAX_LPC_ORDER ];
     opus_int16       W_tmp_QW[     MAX_LPC_ORDER ];
@@ -126,11 +126,12 @@ opus_int32 silk_NLSF_encode(            
     silk_insertion_sort_increasing( RD_Q25, &bestIndex, nSurvivors, 1 );
 
     NLSFIndices[ 0 ] = (opus_int8)tempIndices1[ bestIndex ];
     silk_memcpy( &NLSFIndices[ 1 ], &tempIndices2[ bestIndex * MAX_LPC_ORDER ], psNLSF_CB->order * sizeof( opus_int8 ) );
 
     /* Decode */
     silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB );
 
+    ret = RD_Q25[ 0 ];
     RESTORE_STACK;
-    return RD_Q25[ 0 ];
+    return ret;
 }
--- a/media/libopus/silk/NSQ.c
+++ b/media/libopus/silk/NSQ.c
@@ -26,16 +26,18 @@ POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "main.h"
 #include "stack_alloc.h"
+#include "NSQ.h"
+
 
 static OPUS_INLINE void silk_nsq_scale_states(
     const silk_encoder_state *psEncC,           /* I    Encoder State                   */
     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
     const opus_int32    x_Q3[],                 /* I    input in Q3                     */
     opus_int32          x_sc_Q10[],             /* O    input scaled with 1/Gain        */
     const opus_int16    sLTP[],                 /* I    re-whitened LTP state in Q0     */
     opus_int32          sLTP_Q15[],             /* O    LTP state matching scaled input */
@@ -61,17 +63,18 @@ static OPUS_INLINE void silk_noise_shape
     opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
     opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
     opus_int32          LF_shp_Q14,             /* I                                    */
     opus_int32          Gain_Q16,               /* I                                    */
     opus_int            Lambda_Q10,             /* I                                    */
     opus_int            offset_Q10,             /* I                                    */
     opus_int            length,                 /* I    Input length                    */
     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
-    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order         */
+    int                 arch                    /* I    Architecture                    */
 );
 #endif
 
 void silk_NSQ_c
 (
     const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
     silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
     SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
@@ -150,17 +153,17 @@ void silk_NSQ_c
                 NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
             }
         }
 
         silk_nsq_scale_states( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
 
         silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
             AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
-            offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder );
+            offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch );
 
         x_Q3   += psEncC->subfr_length;
         pulses += psEncC->subfr_length;
         pxq    += psEncC->subfr_length;
     }
 
     /* Update lagPrev for next frame */
     NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
@@ -193,59 +196,47 @@ void silk_noise_shape_quantizer(
     opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
     opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
     opus_int32          LF_shp_Q14,             /* I                                    */
     opus_int32          Gain_Q16,               /* I                                    */
     opus_int            Lambda_Q10,             /* I                                    */
     opus_int            offset_Q10,             /* I                                    */
     opus_int            length,                 /* I    Input length                    */
     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
-    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order         */
+    int                 arch                    /* I    Architecture                    */
 )
 {
-    opus_int     i, j;
+    opus_int     i;
     opus_int32   LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13;
     opus_int32   n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20;
     opus_int32   exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
     opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
     opus_int32   *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr;
+#ifdef silk_short_prediction_create_arch_coef
+    opus_int32   a_Q12_arch[MAX_LPC_ORDER];
+#endif
 
     shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
     pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
     Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
 
     /* Set up short term AR state */
     psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ];
 
+#ifdef silk_short_prediction_create_arch_coef
+    silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder);
+#endif
+
     for( i = 0; i < length; i++ ) {
         /* Generate dither */
         NSQ->rand_seed = silk_RAND( NSQ->rand_seed );
 
         /* Short-term prediction */
-        silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
-        /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
-        LPC_pred_Q10 = silk_RSHIFT( predictLPCOrder, 1 );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[  0 ], a_Q12[ 0 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
-        if( predictLPCOrder == 16 ) {
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
-        }
+        LPC_pred_Q10 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch);
 
         /* Long-term prediction */
         if( signalType == TYPE_VOICED ) {
             /* Unrolled loop */
             /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
             LTP_pred_Q13 = 2;
             LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[  0 ], b_Q14[ 0 ] );
             LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], b_Q14[ 1 ] );
@@ -254,33 +245,18 @@ void silk_noise_shape_quantizer(
             LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
             pred_lag_ptr++;
         } else {
             LTP_pred_Q13 = 0;
         }
 
         /* Noise shape feedback */
         silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
-        tmp2 = psLPC_Q14[ 0 ];
-        tmp1 = NSQ->sAR2_Q14[ 0 ];
-        NSQ->sAR2_Q14[ 0 ] = tmp2;
-        n_AR_Q12 = silk_RSHIFT( shapingLPCOrder, 1 );
-        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ 0 ] );
-        for( j = 2; j < shapingLPCOrder; j += 2 ) {
-            tmp2 = NSQ->sAR2_Q14[ j - 1 ];
-            NSQ->sAR2_Q14[ j - 1 ] = tmp1;
-            n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ j - 1 ] );
-            tmp1 = NSQ->sAR2_Q14[ j + 0 ];
-            NSQ->sAR2_Q14[ j + 0 ] = tmp2;
-            n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ j ] );
-        }
-        NSQ->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
-        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
+        n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(psLPC_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch);
 
-        n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 );                                /* Q11 -> Q12 */
         n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 );
 
         n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );
         n_LF_Q12 = silk_SMLAWT( n_LF_Q12, NSQ->sLF_AR_shp_Q14, LF_shp_Q14 );
 
         silk_assert( lag > 0 || signalType != TYPE_VOICED );
 
         /* Combine prediction and noise shaping signals */
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/NSQ.h
@@ -0,0 +1,101 @@
+/***********************************************************************
+Copyright (c) 2014 Vidyo.
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+#ifndef SILK_NSQ_H
+#define SILK_NSQ_H
+
+#include "SigProc_FIX.h"
+
+#undef silk_short_prediction_create_arch_coef
+
+static OPUS_INLINE opus_int32 silk_noise_shape_quantizer_short_prediction_c(const opus_int32 *buf32, const opus_int16 *coef16, opus_int order)
+{
+    opus_int32 out;
+    silk_assert( order == 10 || order == 16 );
+
+    /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+    out = silk_RSHIFT( order, 1 );
+    out = silk_SMLAWB( out, buf32[  0 ], coef16[ 0 ] );
+    out = silk_SMLAWB( out, buf32[ -1 ], coef16[ 1 ] );
+    out = silk_SMLAWB( out, buf32[ -2 ], coef16[ 2 ] );
+    out = silk_SMLAWB( out, buf32[ -3 ], coef16[ 3 ] );
+    out = silk_SMLAWB( out, buf32[ -4 ], coef16[ 4 ] );
+    out = silk_SMLAWB( out, buf32[ -5 ], coef16[ 5 ] );
+    out = silk_SMLAWB( out, buf32[ -6 ], coef16[ 6 ] );
+    out = silk_SMLAWB( out, buf32[ -7 ], coef16[ 7 ] );
+    out = silk_SMLAWB( out, buf32[ -8 ], coef16[ 8 ] );
+    out = silk_SMLAWB( out, buf32[ -9 ], coef16[ 9 ] );
+
+    if( order == 16 )
+    {
+        out = silk_SMLAWB( out, buf32[ -10 ], coef16[ 10 ] );
+        out = silk_SMLAWB( out, buf32[ -11 ], coef16[ 11 ] );
+        out = silk_SMLAWB( out, buf32[ -12 ], coef16[ 12 ] );
+        out = silk_SMLAWB( out, buf32[ -13 ], coef16[ 13 ] );
+        out = silk_SMLAWB( out, buf32[ -14 ], coef16[ 14 ] );
+        out = silk_SMLAWB( out, buf32[ -15 ], coef16[ 15 ] );
+    }
+    return out;
+}
+
+#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch)  ((void)arch,silk_noise_shape_quantizer_short_prediction_c(in, coef, order))
+
+static OPUS_INLINE opus_int32 silk_NSQ_noise_shape_feedback_loop_c(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order)
+{
+    opus_int32 out;
+    opus_int32 tmp1, tmp2;
+    opus_int j;
+
+    tmp2 = data0[0];
+    tmp1 = data1[0];
+    data1[0] = tmp2;
+
+    out = silk_RSHIFT(order, 1);
+    out = silk_SMLAWB(out, tmp2, coef[0]);
+
+    for (j = 2; j < order; j += 2) {
+        tmp2 = data1[j - 1];
+        data1[j - 1] = tmp1;
+        out = silk_SMLAWB(out, tmp1, coef[j - 1]);
+        tmp1 = data1[j + 0];
+        data1[j + 0] = tmp2;
+        out = silk_SMLAWB(out, tmp2, coef[j]);
+    }
+    data1[order - 1] = tmp1;
+    out = silk_SMLAWB(out, tmp1, coef[order - 1]);
+    /* Q11 -> Q12 */
+    out = silk_LSHIFT32( out, 1 );
+    return out;
+}
+
+#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch)  ((void)arch,silk_NSQ_noise_shape_feedback_loop_c(data0, data1, coef, order))
+
+#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+#include "arm/NSQ_neon.h"
+#endif
+
+#endif /* SILK_NSQ_H */
--- a/media/libopus/silk/NSQ_del_dec.c
+++ b/media/libopus/silk/NSQ_del_dec.c
@@ -26,16 +26,18 @@ POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "main.h"
 #include "stack_alloc.h"
+#include "NSQ.h"
+
 
 typedef struct {
     opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
     opus_int32 RandState[ DECISION_DELAY ];
     opus_int32 Q_Q10[     DECISION_DELAY ];
     opus_int32 Xq_Q14[    DECISION_DELAY ];
     opus_int32 Pred_Q15[  DECISION_DELAY ];
     opus_int32 Shape_Q14[ DECISION_DELAY ];
@@ -101,17 +103,18 @@ static OPUS_INLINE void silk_noise_shape
     opus_int            offset_Q10,             /* I                                        */
     opus_int            length,                 /* I    Input length                        */
     opus_int            subfr,                  /* I    Subframe number                     */
     opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
     opus_int            predictLPCOrder,        /* I    Prediction filter order             */
     opus_int            warping_Q16,            /* I                                        */
     opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
     opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
-    opus_int            decisionDelay           /* I                                        */
+    opus_int            decisionDelay,          /* I                                        */
+    int                 arch                    /* I                                        */
 );
 
 void silk_NSQ_del_dec_c(
     const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
     silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
     SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
     const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
     opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
@@ -255,17 +258,17 @@ void silk_NSQ_del_dec_c(
         }
 
         silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k,
             psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
 
         silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
             delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
             Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
-            psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );
+            psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay, psEncC->arch );
 
         x_Q3   += psEncC->subfr_length;
         pulses += psEncC->subfr_length;
         pxq    += psEncC->subfr_length;
     }
 
     /* Find winner */
     RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
@@ -328,38 +331,47 @@ static OPUS_INLINE void silk_noise_shape
     opus_int            offset_Q10,             /* I                                        */
     opus_int            length,                 /* I    Input length                        */
     opus_int            subfr,                  /* I    Subframe number                     */
     opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
     opus_int            predictLPCOrder,        /* I    Prediction filter order             */
     opus_int            warping_Q16,            /* I                                        */
     opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
     opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
-    opus_int            decisionDelay           /* I                                        */
+    opus_int            decisionDelay,          /* I                                        */
+    int                 arch                    /* I                                        */
 )
 {
     opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
     opus_int32   Winner_rand_state;
     opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
     opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
     opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
     opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
     opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
+#ifdef silk_short_prediction_create_arch_coef
+    opus_int32   a_Q12_arch[MAX_LPC_ORDER];
+#endif
+
     VARDECL( NSQ_sample_pair, psSampleState );
     NSQ_del_dec_struct *psDD;
     NSQ_sample_struct  *psSS;
     SAVE_STACK;
 
     silk_assert( nStatesDelayedDecision > 0 );
     ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
 
     shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
     pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
     Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
 
+#ifdef silk_short_prediction_create_arch_coef
+    silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder);
+#endif
+
     for( i = 0; i < length; i++ ) {
         /* Perform common calculations used in all states */
 
         /* Long-term prediction */
         if( signalType == TYPE_VOICED ) {
             /* Unrolled loop */
             /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
             LTP_pred_Q14 = 2;
@@ -393,37 +405,17 @@ static OPUS_INLINE void silk_noise_shape
             psSS = psSampleState[ k ];
 
             /* Generate dither */
             psDD->Seed = silk_RAND( psDD->Seed );
 
             /* Pointer used in short term prediction and shaping */
             psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
             /* Short-term prediction */
-            silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
-            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
-            LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[  0 ], a_Q12[ 0 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
-            if( predictLPCOrder == 16 ) {
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
-            }
+            LPC_pred_Q14 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch);
             LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );                              /* Q10 -> Q14 */
 
             /* Noise shape feedback */
             silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
             /* Output of lowpass section */
             tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
             /* Output of allpass section */
             tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
--- a/media/libopus/silk/PLC.c
+++ b/media/libopus/silk/PLC.c
@@ -360,17 +360,18 @@ static OPUS_INLINE void silk_PLC_conceal
         LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  8 ], A_Q12[ 7 ] );
         LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  9 ], A_Q12[ 8 ] );
         LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
         for( j = 10; j < psDec->LPC_order; j++ ) {
             LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - j - 1 ], A_Q12[ j ] );
         }
 
         /* Add prediction to LPC excitation */
-        sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
+        sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ],
+                                            silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ));
 
         /* Scale with Gain */
         frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) );
     }
 
     /* Save LPC state */
     silk_memcpy( psDec->sLPC_Q14_buf, &sLPC_Q14_ptr[ psDec->frame_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
 
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/arm/NSQ_neon.c
@@ -0,0 +1,112 @@
+/***********************************************************************
+Copyright (C) 2014 Vidyo
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <arm_neon.h>
+#include "main.h"
+#include "stack_alloc.h"
+#include "NSQ.h"
+#include "celt/cpu_support.h"
+#include "celt/arm/armcpu.h"
+
+opus_int32 silk_noise_shape_quantizer_short_prediction_neon(const opus_int32 *buf32, const opus_int32 *coef32, opus_int order)
+{
+    int32x4_t coef0 = vld1q_s32(coef32);
+    int32x4_t coef1 = vld1q_s32(coef32 + 4);
+    int32x4_t coef2 = vld1q_s32(coef32 + 8);
+    int32x4_t coef3 = vld1q_s32(coef32 + 12);
+
+    int32x4_t a0 = vld1q_s32(buf32 - 15);
+    int32x4_t a1 = vld1q_s32(buf32 - 11);
+    int32x4_t a2 = vld1q_s32(buf32 - 7);
+    int32x4_t a3 = vld1q_s32(buf32 - 3);
+
+    int32x4_t b0 = vqdmulhq_s32(coef0, a0);
+    int32x4_t b1 = vqdmulhq_s32(coef1, a1);
+    int32x4_t b2 = vqdmulhq_s32(coef2, a2);
+    int32x4_t b3 = vqdmulhq_s32(coef3, a3);
+
+    int32x4_t c0 = vaddq_s32(b0, b1);
+    int32x4_t c1 = vaddq_s32(b2, b3);
+
+    int32x4_t d = vaddq_s32(c0, c1);
+
+    int64x2_t e = vpaddlq_s32(d);
+
+    int64x1_t f = vadd_s64(vget_low_s64(e), vget_high_s64(e));
+
+    opus_int32 out = vget_lane_s32(vreinterpret_s32_s64(f), 0);
+
+    out += silk_RSHIFT( order, 1 );
+
+    return out;
+}
+
+
+opus_int32 silk_NSQ_noise_shape_feedback_loop_neon(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order)
+{
+    opus_int32 out;
+    if (order == 8)
+    {
+        int32x4_t a00 = vdupq_n_s32(data0[0]);
+        int32x4_t a01 = vld1q_s32(data1);  /* data1[0] ... [3] */
+
+        int32x4_t a0 = vextq_s32 (a00, a01, 3); /* data0[0] data1[0] ...[2] */
+        int32x4_t a1 = vld1q_s32(data1 + 3);  /* data1[3] ... [6] */
+
+        /*TODO: Convert these once in advance instead of once per sample, like
+          silk_noise_shape_quantizer_short_prediction_neon() does.*/
+        int16x8_t coef16 = vld1q_s16(coef);
+        int32x4_t coef0 = vmovl_s16(vget_low_s16(coef16));
+        int32x4_t coef1 = vmovl_s16(vget_high_s16(coef16));
+
+        /*This is not bit-exact with the C version, since we do not drop the
+          lower 16 bits of each multiply, but wait until the end to truncate
+          precision. This is an encoder-specific calculation (and unlike
+          silk_noise_shape_quantizer_short_prediction_neon(), is not meant to
+          simulate what the decoder will do). We still could use vqdmulhq_s32()
+          like silk_noise_shape_quantizer_short_prediction_neon() and save
+          half the multiplies, but the speed difference is not large, since we
+          then need two extra adds.*/
+        int64x2_t b0 = vmull_s32(vget_low_s32(a0), vget_low_s32(coef0));
+        int64x2_t b1 = vmlal_s32(b0, vget_high_s32(a0), vget_high_s32(coef0));
+        int64x2_t b2 = vmlal_s32(b1, vget_low_s32(a1), vget_low_s32(coef1));
+        int64x2_t b3 = vmlal_s32(b2, vget_high_s32(a1), vget_high_s32(coef1));
+
+        int64x1_t c = vadd_s64(vget_low_s64(b3), vget_high_s64(b3));
+        int64x1_t cS = vrshr_n_s64(c, 15);
+        int32x2_t d = vreinterpret_s32_s64(cS);
+
+        out = vget_lane_s32(d, 0);
+        vst1q_s32(data1, a0);
+        vst1q_s32(data1 + 4, a1);
+        return out;
+    }
+    return silk_NSQ_noise_shape_feedback_loop_c(data0, data1, coef, order);
+}
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/arm/NSQ_neon.h
@@ -0,0 +1,113 @@
+/***********************************************************************
+Copyright (C) 2014 Vidyo
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+#ifndef SILK_NSQ_NEON_H
+#define SILK_NSQ_NEON_H
+
+#include "cpu_support.h"
+
+#undef silk_short_prediction_create_arch_coef
+/* For vectorized calc, reverse a_Q12 coefs, convert to 32-bit, and shift for vqdmulhq_s32. */
+static OPUS_INLINE void silk_short_prediction_create_arch_coef_neon(opus_int32 *out, const opus_int16 *in, opus_int order)
+{
+    out[15] = in[0] << 15;
+    out[14] = in[1] << 15;
+    out[13] = in[2] << 15;
+    out[12] = in[3] << 15;
+    out[11] = in[4] << 15;
+    out[10] = in[5] << 15;
+    out[9]  = in[6] << 15;
+    out[8]  = in[7] << 15;
+    out[7]  = in[8] << 15;
+    out[6]  = in[9] << 15;
+
+    if (order == 16)
+    {
+        out[5] = in[10] << 15;
+        out[4] = in[11] << 15;
+        out[3] = in[12] << 15;
+        out[2] = in[13] << 15;
+        out[1] = in[14] << 15;
+        out[0] = in[15] << 15;
+    }
+    else
+    {
+        out[5] = 0;
+        out[4] = 0;
+        out[3] = 0;
+        out[2] = 0;
+        out[1] = 0;
+        out[0] = 0;
+    }
+}
+
+#if defined(OPUS_ARM_PRESUME_NEON_INTR)
+
+#define silk_short_prediction_create_arch_coef(out, in, order) \
+    (silk_short_prediction_create_arch_coef_neon(out, in, order))
+
+#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+
+#define silk_short_prediction_create_arch_coef(out, in, order) \
+    do { if (arch == OPUS_ARCH_ARM_NEON) { silk_short_prediction_create_arch_coef_neon(out, in, order); } } while (0)
+
+#endif
+
+opus_int32 silk_noise_shape_quantizer_short_prediction_neon(const opus_int32 *buf32, const opus_int32 *coef32, opus_int order);
+
+opus_int32 silk_NSQ_noise_shape_feedback_loop_neon(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order);
+
+#if defined(OPUS_ARM_PRESUME_NEON_INTR)
+#undef silk_noise_shape_quantizer_short_prediction
+#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) \
+    ((void)arch,silk_noise_shape_quantizer_short_prediction_neon(in, coefRev, order))
+
+#undef silk_NSQ_noise_shape_feedback_loop
+#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch)  ((void)arch,silk_NSQ_noise_shape_feedback_loop_neon(data0, data1, coef, order))
+
+#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+
+/* silk_noise_shape_quantizer_short_prediction implementations take different parameters based on arch
+   (coef vs. coefRev) so can't use the usual IMPL table implementation */
+#undef silk_noise_shape_quantizer_short_prediction
+#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch)  \
+    (arch == OPUS_ARCH_ARM_NEON ? \
+        silk_noise_shape_quantizer_short_prediction_neon(in, coefRev, order) : \
+        silk_noise_shape_quantizer_short_prediction_c(in, coef, order))
+
+extern opus_int32
+ (*const SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[OPUS_ARCHMASK+1])(
+ const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef,
+ opus_int order);
+
+#undef silk_NSQ_noise_shape_feedback_loop
+#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) \
+ (SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[(arch)&OPUS_ARCHMASK](data0, data1, \
+ coef, order))
+
+#endif
+
+#endif /* SILK_NSQ_NEON_H */
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/arm/arm_silk_map.c
@@ -0,0 +1,55 @@
+/***********************************************************************
+Copyright (C) 2014 Vidyo
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "NSQ.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+# if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
+ !defined(OPUS_ARM_PRESUME_NEON_INTR))
+
+/*There is no table for silk_noise_shape_quantizer_short_prediction because the
+   NEON version takes different parameters than the C version.
+  Instead RTCD is done via if statements at the call sites.
+  See NSQ_neon.h for details.*/
+
+opus_int32
+ (*const SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[OPUS_ARCHMASK+1])(
+ const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef,
+ opus_int order) = {
+  silk_NSQ_noise_shape_feedback_loop_c,    /* ARMv4 */
+  silk_NSQ_noise_shape_feedback_loop_c,    /* EDSP */
+  silk_NSQ_noise_shape_feedback_loop_c,    /* Media */
+  silk_NSQ_noise_shape_feedback_loop_neon, /* NEON */
+};
+
+# endif
+
+#endif /* OPUS_HAVE_RTCD */
new file mode 100644
--- /dev/null
+++ b/media/libopus/silk/arm/macros_arm64.h
@@ -0,0 +1,39 @@
+/***********************************************************************
+Copyright (C) 2015 Vidyo
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MACROS_ARM64_H
+#define SILK_MACROS_ARM64_H
+
+#include <arm_neon.h>
+
+#undef silk_ADD_SAT32
+#define silk_ADD_SAT32(a, b) (vqadds_s32((a), (b)))
+
+#undef silk_SUB_SAT32
+#define silk_SUB_SAT32(a, b) (vqsubs_s32((a), (b)))
+
+#endif /* SILK_MACROS_ARM64_H */
--- a/media/libopus/silk/decode_core.c
+++ b/media/libopus/silk/decode_core.c
@@ -214,17 +214,17 @@ void silk_decode_core(
                 LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12_tmp[ 11 ] );
                 LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12_tmp[ 12 ] );
                 LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12_tmp[ 13 ] );
                 LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12_tmp[ 14 ] );
                 LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12_tmp[ 15 ] );
             }
 
             /* Add prediction to LPC excitation */
-            sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( pres_Q14[ i ], LPC_pred_Q10, 4 );
+            sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( pres_Q14[ i ], silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ) );
 
             /* Scale with gain */
             pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) );
         }
 
         /* DEBUG_STORE_DATA( dec.pcm, pxq, psDec->subfr_length * sizeof( opus_int16 ) ) */
 
         /* Update LPC filter state */
--- a/media/libopus/silk/fixed/burg_modified_FIX.c
+++ b/media/libopus/silk/fixed/burg_modified_FIX.c
@@ -145,18 +145,21 @@ void silk_burg_modified_c(
                 x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    -rshifts );            /* Q( -rshifts ) */
                 x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );            /* Q( -rshifts ) */
                 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    17 );                  /* Q17 */
                 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 );                  /* Q17 */
                 for( k = 0; k < n; k++ ) {
                     C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
                     C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
                     Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
-                    tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
-                    tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
+                    /* We sometimes have get overflows in the multiplications (even beyond +/- 2^32),
+                       but they cancel each other and the real result seems to always fit in a 32-bit
+                       signed integer. This was determined experimentally, not theoretically (unfortunately). */
+                    tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
+                    tmp2 = silk_MLA_ovflw( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
                 }
                 tmp1 = -tmp1;                                                                           /* Q17 */
                 tmp2 = -tmp2;                                                                           /* Q17 */
                 for( k = 0; k <= n; k++ ) {
                     CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
                         silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) );                    /* Q( -rshift ) */
                     CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
                         silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
@@ -195,22 +198,24 @@ void silk_burg_modified_c(
 
         /* Update inverse prediction gain */
         tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
         tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
         if( tmp1 <= minInvGain_Q30 ) {
             /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
             tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
             rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
-            /* Newton-Raphson iteration */
-            rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
-            rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
-            if( num < 0 ) {
-                /* Ensure adjusted reflection coefficients has the original sign */
-                rc_Q31 = -rc_Q31;
+            if( rc_Q31 > 0 ) {
+                /* Newton-Raphson iteration */
+                rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                       /* Q15 */
+                rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                                   /* Q31 */
+                if( num < 0 ) {
+                    /* Ensure adjusted reflection coefficients has the original sign */
+                    rc_Q31 = -rc_Q31;
+                }
             }
             invGain_Q30 = minInvGain_Q30;
             reached_max_gain = 1;
         } else {
             invGain_Q30 = tmp1;
         }
 
         /* Update the AR coefficients */
--- a/media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c
+++ b/media/libopus/silk/fixed/x86/burg_modified_FIX_sse.c
@@ -295,22 +295,24 @@ void silk_burg_modified_sse4_1(
 
         /* Update inverse prediction gain */
         tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
         tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
         if( tmp1 <= minInvGain_Q30 ) {
             /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
             tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
             rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
-            /* Newton-Raphson iteration */
-            rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
-            rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
-            if( num < 0 ) {
-                /* Ensure adjusted reflection coefficients has the original sign */
-                rc_Q31 = -rc_Q31;
+            if( rc_Q31 > 0 ) {
+                 /* Newton-Raphson iteration */
+                rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
+                rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
+                if( num < 0 ) {
+                    /* Ensure adjusted reflection coefficients has the original sign */
+                    rc_Q31 = -rc_Q31;
+                }
             }
             invGain_Q30 = minInvGain_Q30;
             reached_max_gain = 1;
         } else {
             invGain_Q30 = tmp1;
         }
 
         /* Update the AR coefficients */
--- a/media/libopus/silk/macros.h
+++ b/media/libopus/silk/macros.h
@@ -29,50 +29,52 @@ POSSIBILITY OF SUCH DAMAGE.
 #define SILK_MACROS_H
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "opus_types.h"
 #include "opus_defines.h"
+#include "arch.h"
 
 #if OPUS_GNUC_PREREQ(3, 0)
 #define opus_likely(x)       (__builtin_expect(!!(x), 1))
 #define opus_unlikely(x)     (__builtin_expect(!!(x), 0))
 #else
 #define opus_likely(x)       (!!(x))
 #define opus_unlikely(x)     (!!(x))
 #endif
 
-/* Set this if opus_int64 is a native type of the CPU. */
-#define OPUS_FAST_INT64 (defined(__x86_64__) || defined(__LP64__) || defined(_WIN64))
-
 /* This is an OPUS_INLINE header file for general platform. */
 
 /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
 #if OPUS_FAST_INT64
-#define silk_SMULWB(a32, b32)            (((a32) * (opus_int64)((opus_int16)(b32))) >> 16)
+#define silk_SMULWB(a32, b32)            ((opus_int32)(((a32) * (opus_int64)((opus_int16)(b32))) >> 16))
 #else
 #define silk_SMULWB(a32, b32)            ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16))
 #endif
 
 /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
 #if OPUS_FAST_INT64
-#define silk_SMLAWB(a32, b32, c32)       ((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16))
+#define silk_SMLAWB(a32, b32, c32)       ((opus_int32)((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16)))
 #else
 #define silk_SMLAWB(a32, b32, c32)       ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)))
 #endif
 
 /* (a32 * (b32 >> 16)) >> 16 */
+#if OPUS_FAST_INT64
+#define silk_SMULWT(a32, b32)            ((opus_int32)(((a32) * (opus_int64)((b32) >> 16)) >> 16))
+#else
 #define silk_SMULWT(a32, b32)            (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16))
+#endif
 
 /* a32 + (b32 * (c32 >> 16)) >> 16 */
 #if OPUS_FAST_INT64
-#define silk_SMLAWT(a32, b32, c32)       ((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16))
+#define silk_SMLAWT(a32, b32, c32)       ((opus_int32)((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16)))
 #else
 #define silk_SMLAWT(a32, b32, c32)       ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16))
 #endif
 
 /* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
 #define silk_SMULBB(a32, b32)            ((opus_int32)((opus_int16)(a32)) * (opus_int32)((opus_int16)(b32)))
 
 /* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */
@@ -84,24 +86,24 @@ POSSIBILITY OF SUCH DAMAGE.
 /* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */
 #define silk_SMLABT(a32, b32, c32)       ((a32) + ((opus_int32)((opus_int16)(b32))) * ((c32) >> 16))
 
 /* a64 + (b32 * c32) */
 #define silk_SMLAL(a64, b32, c32)        (silk_ADD64((a64), ((opus_int64)(b32) * (opus_int64)(c32))))
 
 /* (a32 * b32) >> 16 */
 #if OPUS_FAST_INT64
-#define silk_SMULWW(a32, b32)            (((opus_int64)(a32) * (b32)) >> 16)
+#define silk_SMULWW(a32, b32)            ((opus_int32)(((opus_int64)(a32) * (b32)) >> 16))
 #else
 #define silk_SMULWW(a32, b32)            silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16))
 #endif
 
 /* a32 + ((b32 * c32) >> 16) */
 #if OPUS_FAST_INT64
-#define silk_SMLAWW(a32, b32, c32)       ((a32) + (((opus_int64)(b32) * (c32)) >> 16))
+#define silk_SMLAWW(a32, b32, c32)       ((opus_int32)((a32) + (((opus_int64)(b32) * (c32)) >> 16)))
 #else
 #define silk_SMLAWW(a32, b32, c32)       silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16))
 #endif
 
 /* add/subtract with output saturated */
 #define silk_ADD_SAT32(a, b)             ((((opus_uint32)(a) + (opus_uint32)(b)) & 0x80000000) == 0 ?                              \
                                         ((((a) & (b)) & 0x80000000) != 0 ? silk_int32_MIN : (a)+(b)) :   \
                                         ((((a) | (b)) & 0x80000000) == 0 ? silk_int32_MAX : (a)+(b)) )
@@ -144,10 +146,14 @@ static OPUS_INLINE opus_int32 silk_CLZ32
 #ifdef OPUS_ARM_INLINE_ASM
 #include "arm/macros_armv4.h"
 #endif
 
 #ifdef OPUS_ARM_INLINE_EDSP
 #include "arm/macros_armv5e.h"
 #endif
 
+#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
+#include "arm/macros_arm64.h"
+#endif
+
 #endif /* SILK_MACROS_H */
 
--- a/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h
+++ b/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h
@@ -57,17 +57,18 @@ static inline void silk_noise_shape_quan
     opus_int            offset_Q10,             /* I                                        */
     opus_int            length,                 /* I    Input length                        */
     opus_int            subfr,                  /* I    Subframe number                     */
     opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
     opus_int            predictLPCOrder,        /* I    Prediction filter order             */
     opus_int            warping_Q16,            /* I                                        */
     opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
     opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
-    opus_int            decisionDelay           /* I                                        */
+    opus_int            decisionDelay,          /* I                                        */
+    int                 arch                    /* I                                        */
 )
 {
     opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
     opus_int32   Winner_rand_state;
     opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
     opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
     opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
     opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
@@ -77,16 +78,19 @@ static inline void silk_noise_shape_quan
     NSQ_sample_struct  *psSS;
     opus_int16 b_Q14_0, b_Q14_1, b_Q14_2, b_Q14_3, b_Q14_4;
     opus_int16 a_Q12_0, a_Q12_1, a_Q12_2, a_Q12_3, a_Q12_4, a_Q12_5, a_Q12_6;
     opus_int16 a_Q12_7, a_Q12_8, a_Q12_9, a_Q12_10, a_Q12_11, a_Q12_12, a_Q12_13;
     opus_int16 a_Q12_14, a_Q12_15;
 
     opus_int32 cur, prev, next;
 
+    /*Unused.*/
+    (void)arch;
+
     //Intialize b_Q14 variables
     b_Q14_0 = b_Q14[ 0 ];
     b_Q14_1 = b_Q14[ 1 ];
     b_Q14_2 = b_Q14[ 2 ];
     b_Q14_3 = b_Q14[ 3 ];
     b_Q14_4 = b_Q14[ 4 ];
 
     //Intialize a_Q12 variables
--- a/media/libopus/silk/process_NLSFs.c
+++ b/media/libopus/silk/process_NLSFs.c
@@ -36,17 +36,17 @@ void silk_process_NLSFs(
     silk_encoder_state          *psEncC,                            /* I/O  Encoder state                               */
     opus_int16                  PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O    Prediction coefficients                     */
     opus_int16                  pNLSF_Q15[         MAX_LPC_ORDER ], /* I/O  Normalized LSFs (quant out) (0 - (2^15-1))  */
     const opus_int16            prev_NLSFq_Q15[    MAX_LPC_ORDER ]  /* I    Previous Normalized LSFs (0 - (2^15-1))     */
 )
 {
     opus_int     i, doInterpolate;
     opus_int     NLSF_mu_Q20;
-    opus_int32   i_sqr_Q15;
+    opus_int16   i_sqr_Q15;
     opus_int16   pNLSF0_temp_Q15[ MAX_LPC_ORDER ];
     opus_int16   pNLSFW_QW[ MAX_LPC_ORDER ];
     opus_int16   pNLSFW0_temp_QW[ MAX_LPC_ORDER ];
 
     silk_assert( psEncC->speech_activity_Q8 >=   0 );
     silk_assert( psEncC->speech_activity_Q8 <= SILK_FIX_CONST( 1.0, 8 ) );
     silk_assert( psEncC->useInterpolatedNLSFs == 1 || psEncC->indices.NLSFInterpCoef_Q2 == ( 1 << 2 ) );
 
@@ -74,17 +74,18 @@ void silk_process_NLSFs(
             psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder );
 
         /* Calculate first half NLSF weights for the interpolated NLSFs */
         silk_NLSF_VQ_weights_laroia( pNLSFW0_temp_QW, pNLSF0_temp_Q15, psEncC->predictLPCOrder );
 
         /* Update NLSF weights with contribution from first half */
         i_sqr_Q15 = silk_LSHIFT( silk_SMULBB( psEncC->indices.NLSFInterpCoef_Q2, psEncC->indices.NLSFInterpCoef_Q2 ), 11 );
         for( i = 0; i < psEncC->predictLPCOrder; i++ ) {
-            pNLSFW_QW[ i ] = silk_SMLAWB( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), (opus_int32)pNLSFW0_temp_QW[ i ], i_sqr_Q15 );
+            pNLSFW_QW[ i ] = silk_ADD16( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), silk_RSHIFT(
+                  silk_SMULBB( pNLSFW0_temp_QW[ i ], i_sqr_Q15 ), 16) );
             silk_assert( pNLSFW_QW[ i ] >= 1 );
         }
     }
 
     silk_NLSF_encode( psEncC->indices.NLSFIndices, pNLSF_Q15, psEncC->psNLSF_CB, pNLSFW_QW,
         NLSF_mu_Q20, psEncC->NLSF_MSVQ_Survivors, psEncC->indices.signalType );
 
     /* Convert quantized NLSFs back to LPC coefficients */
@@ -95,11 +96,12 @@ void silk_process_NLSFs(
         silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15,
             psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder );
 
         /* Convert back to LPC coefficients */
         silk_NLSF2A( PredCoef_Q12[ 0 ], pNLSF0_temp_Q15, psEncC->predictLPCOrder );
 
     } else {
         /* Copy LPC coefficients for first half from second half */
+        silk_assert( psEncC->predictLPCOrder <= MAX_LPC_ORDER );
         silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) );
     }
 }
--- a/media/libopus/silk/sort.c
+++ b/media/libopus/silk/sort.c
@@ -28,17 +28,17 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 /* Insertion sort (fast for already almost sorted arrays):   */
 /* Best case:  O(n)   for an already sorted array            */
 /* Worst case: O(n^2) for an inversely sorted array          */
 /*                                                           */
-/* Shell short:    http://en.wikipedia.org/wiki/Shell_sort   */
+/* Shell short:    https://en.wikipedia.org/wiki/Shell_sort  */
 
 #include "SigProc_FIX.h"
 
 void silk_insertion_sort_increasing(
     opus_int32           *a,             /* I/O   Unsorted / Sorted vector               */
     opus_int             *idx,           /* O     Index vector for the sorted elements   */
     const opus_int       L,              /* I     Vector length                          */
     const opus_int       K               /* I     Number of correctly sorted positions   */
--- a/media/libopus/silk/stereo_LR_to_MS.c
+++ b/media/libopus/silk/stereo_LR_to_MS.c
@@ -72,26 +72,26 @@ void silk_stereo_LR_to_MS(
     silk_memcpy( side, state->sSide, 2 * sizeof( opus_int16 ) );
     silk_memcpy( state->sMid,  &mid[  frame_length ], 2 * sizeof( opus_int16 ) );
     silk_memcpy( state->sSide, &side[ frame_length ], 2 * sizeof( opus_int16 ) );
 
     /* LP and HP filter mid signal */
     ALLOC( LP_mid, frame_length, opus_int16 );
     ALLOC( HP_mid, frame_length, opus_int16 );
     for( n = 0; n < frame_length; n++ ) {
-        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );
+        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );
         LP_mid[ n ] = sum;
         HP_mid[ n ] = mid[ n + 1 ] - sum;
     }
 
     /* LP and HP filter side signal */
     ALLOC( LP_side, frame_length, opus_int16 );
     ALLOC( HP_side, frame_length, opus_int16 );
     for( n = 0; n < frame_length; n++ ) {
-        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + side[ n + 2 ], side[ n + 1 ], 1 ), 2 );
+        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + (opus_int32)side[ n + 2 ], side[ n + 1 ], 1 ), 2 );
         LP_side[ n ] = sum;
         HP_side[ n ] = side[ n + 1 ] - sum;
     }
 
     /* Find energies and predictors */
     is10msFrame = frame_length == 10 * fs_kHz;
     smooth_coef_Q16 = is10msFrame ?
         SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) :
@@ -202,27 +202,27 @@ void silk_stereo_LR_to_MS(
     denom_Q16  = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz );
     delta0_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 );
     delta1_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 );
     deltaw_Q24 =  silk_LSHIFT( silk_SMULWB( width_Q14 - state->width_prev_Q14, denom_Q16 ), 10 );
     for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) {
         pred0_Q13 += delta0_Q13;
         pred1_Q13 += delta1_Q13;
         w_Q24   += deltaw_Q24;
-        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
         sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 );               /* Q8  */
         sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 );       /* Q8  */
         x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
     }
 
     pred0_Q13 = -pred_Q13[ 0 ];
     pred1_Q13 = -pred_Q13[ 1 ];
     w_Q24     =  silk_LSHIFT( width_Q14, 10 );
     for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
-        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
         sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 );               /* Q8  */
         sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 );       /* Q8  */
         x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
     }
     state->pred_prev_Q13[ 0 ] = (opus_int16)pred_Q13[ 0 ];
     state->pred_prev_Q13[ 1 ] = (opus_int16)pred_Q13[ 1 ];
     state->width_prev_Q14     = (opus_int16)width_Q14;
     RESTORE_STACK;
--- a/media/libopus/silk/x86/NSQ_sse.c
+++ b/media/libopus/silk/x86/NSQ_sse.c
@@ -216,17 +216,17 @@ void silk_NSQ_sse4_1(
             silk_noise_shape_quantizer_10_16_sse4_1( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
                 AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ],
                 offset_Q10, psEncC->subfr_length, &(table[32]) );
         }
         else
         {
             silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
                 AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
-                offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder );
+                offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch );
         }
 
         x_Q3   += psEncC->subfr_length;
         pulses += psEncC->subfr_length;
         pxq    += psEncC->subfr_length;
     }
 
     /* Update lagPrev for next frame */
--- a/media/libopus/silk/x86/main_sse.h
+++ b/media/libopus/silk/x86/main_sse.h
@@ -202,17 +202,18 @@ void silk_noise_shape_quantizer(
     opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
     opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
     opus_int32          LF_shp_Q14,             /* I                                    */
     opus_int32          Gain_Q16,               /* I                                    */
     opus_int            Lambda_Q10,             /* I                                    */
     opus_int            offset_Q10,             /* I                                    */
     opus_int            length,                 /* I    Input length                    */
     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
-    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order         */
+    int                 arch                    /* I    Architecture                    */
 );
 
 /**************************/
 /* Noise level estimation */
 /**************************/
 void silk_VAD_GetNoiseLevels(
     const opus_int32            pX[ VAD_N_BANDS ],  /* I    subband energies                            */
     silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
--- a/media/libopus/sources.mozbuild
+++ b/media/libopus/sources.mozbuild
@@ -158,16 +158,21 @@ silk_sources = [
 silk_sources_sse4_1 = [
     'silk/x86/NSQ_del_dec_sse.c',
     'silk/x86/NSQ_sse.c',
     'silk/x86/VAD_sse.c',
     'silk/x86/VQ_WMat_EC_sse.c',
     'silk/x86/x86_silk_map.c',
 ]
 
+silk_sources_arm_neon_intr = [
+    'silk/arm/arm_silk_map.c',
+    'silk/arm/NSQ_neon.c',
+]
+
 silk_sources_fixed = [
     'silk/fixed/apply_sine_window_FIX.c',
     'silk/fixed/autocorr_FIX.c',
     'silk/fixed/burg_modified_FIX.c',
     'silk/fixed/corrMatrix_FIX.c',
     'silk/fixed/encode_frame_FIX.c',
     'silk/fixed/find_LPC_FIX.c',
     'silk/fixed/find_LTP_FIX.c',
--- a/media/libopus/src/analysis.c
+++ b/media/libopus/src/analysis.c
@@ -535,27 +535,24 @@ static void tonality_analysis(TonalityAn
        float p0, p1;
        /* Probabilities for "all speech" and "all music" */
        float s0, m0;
        /* Probability sum for renormalisation */
        float psum;
        /* Instantaneous probability of speech and music, with beta pre-applied. */
        float speech0;
        float music0;
+       float p, q;
 
        /* One transition every 3 minutes of active audio */
        tau = .00005f*frame_probs[1];
-       beta = .05f;
-       if (1) {
-          /* Adapt beta based on how "unexpected" the new prob is */
-          float p, q;
-          p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
-          q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
-          beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
-       }
+       /* Adapt beta based on how "unexpected" the new prob is */
+       p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
+       q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
+       beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
        /* p0 and p1 are the probabilities of speech and music at this frame
           using only information from previous frame and applying the
           state transition model */
        p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;
        p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
        /* We apply the current probability with exponent beta to work around
           the fact that the probability estimates aren't independent. */
        p0 *= (float)pow(1-frame_probs[0], beta);
--- a/media/libopus/src/opus.c
+++ b/media/libopus/src/opus.c
@@ -99,16 +99,20 @@ OPUS_EXPORT void opus_pcm_soft_clip(floa
             }
             end++;
          }
          /* Detect the special case where we clip before the first zero crossing */
          special = (start==0 && x[i*C]*x[0]>=0);
 
          /* Compute a such that maxval + a*maxval^2 = 1 */
          a=(maxval-1)/(maxval*maxval);
+         /* Slightly boost "a" by 2^-22. This is just enough to ensure -ffast-math
+            does not cause output values larger than +/-1, but small enough not
+            to matter even for 24-bit output.  */
+         a += a*2.4e-7;
          if (x[i*C]>0)
             a = -a;
          /* Apply soft clipping */
          for (i=start;i<end;i++)
             x[i*C] = x[i*C]+a*x[i*C]*x[i*C];
 
          if (special && peak_pos>=2)
          {
@@ -196,18 +200,20 @@ int opus_packet_parse_impl(const unsigne
    int count;
    int cbr;
    unsigned char ch, toc;
    int framesize;
    opus_int32 last_size;
    opus_int32 pad = 0;
    const unsigned char *data0 = data;
 
-   if (size==NULL)
+   if (size==NULL || len<0)
       return OPUS_BAD_ARG;
+   if (len==0)
+      return OPUS_INVALID_PACKET;
 
    framesize = opus_packet_get_samples_per_frame(data, 48000);
 
    cbr = 0;
    toc = *data++;
    len--;
    last_size = len;
    switch (toc&0x3)
--- a/media/libopus/src/opus_encoder.c
+++ b/media/libopus/src/opus_encoder.c
@@ -855,30 +855,30 @@ opus_int32 compute_frame_size(const void
    }
    if (frame_size<0)
       return -1;
    return frame_size;
 }
 
 opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem)
 {
-   opus_val16 corr;
-   opus_val16 ldiff;
-   opus_val16 width;
    opus_val32 xx, xy, yy;
    opus_val16 sqrt_xx, sqrt_yy;
    opus_val16 qrrt_xx, qrrt_yy;
    int frame_rate;
    int i;
    opus_val16 short_alpha;
 
    frame_rate = Fs/frame_size;
-   short_alpha = Q15ONE - 25*Q15ONE/IMAX(50,frame_rate);
+   short_alpha = Q15ONE - MULT16_16(25, Q15ONE)/IMAX(50,frame_rate);
    xx=xy=yy=0;
-   for (i=0;i<frame_size;i+=4)
+   /* Unroll by 4. The frame size is always a multiple of 4 *except* for
+      2.5 ms frames at 12 kHz. Since this setting is very rare (and very
+      stupid), we just discard the last two samples. */
+   for (i=0;i<frame_size-3;i+=4)
    {
       opus_val32 pxx=0;
       opus_val32 pxy=0;
       opus_val32 pyy=0;
       opus_val16 x, y;
       x = pcm[2*i];
       y = pcm[2*i+1];
       pxx = SHR32(MULT16_16(x,x),2);
@@ -907,37 +907,36 @@ opus_val16 compute_stereo_width(const op
    mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX);
    mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY);
    mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY);
    mem->XX = MAX32(0, mem->XX);
    mem->XY = MAX32(0, mem->XY);
    mem->YY = MAX32(0, mem->YY);
    if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18))
    {
+      opus_val16 corr;
+      opus_val16 ldiff;
+      opus_val16 width;
       sqrt_xx = celt_sqrt(mem->XX);
       sqrt_yy = celt_sqrt(mem->YY);
       qrrt_xx = celt_sqrt(sqrt_xx);
       qrrt_yy = celt_sqrt(sqrt_yy);
       /* Inter-channel correlation */
       mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy);
       corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16);
       /* Approximate loudness difference */
-      ldiff = Q15ONE*ABS16(qrrt_xx-qrrt_yy)/(EPSILON+qrrt_xx+qrrt_yy);
+      ldiff = MULT16_16(Q15ONE, ABS16(qrrt_xx-qrrt_yy))/(EPSILON+qrrt_xx+qrrt_yy);
       width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff);
       /* Smoothing over one second */
       mem->smoothed_width += (width-mem->smoothed_width)/frame_rate;
       /* Peak follower */
       mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width);
-   } else {
-      width = 0;
-      corr=Q15ONE;
-      ldiff=0;
    }
    /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/
-   return EXTRACT16(MIN32(Q15ONE,20*mem->max_follower));
+   return EXTRACT16(MIN32(Q15ONE, MULT16_16(20, mem->max_follower)));
 }
 
 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
                 unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
                 const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2,
                 int analysis_channels, downmix_func downmix, int float_api)
 {
     void *silk_enc;
@@ -1045,44 +1044,54 @@ opus_int32 opus_encode_native(OpusEncode
     if (st->channels==2 && st->force_channels!=1)
        stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem);
     else
        stereo_width = 0;
     total_buffer = delay_compensation;
     st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);
 
     frame_rate = st->Fs/frame_size;
+    if (!st->use_vbr)
+    {
+       int cbrBytes;
+       /* Multiply by 3 to make sure the division is exact. */
+       int frame_rate3 = 3*st->Fs/frame_size;
+       /* We need to make sure that "int" values always fit in 16 bits. */
+       cbrBytes = IMIN( (3*st->bitrate_bps/8 + frame_rate3/2)/frame_rate3, max_data_bytes);
+       st->bitrate_bps = cbrBytes*(opus_int32)frame_rate3*8/3;
+       max_data_bytes = cbrBytes;
+    }
     if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8
        || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400)))
     {
        /*If the space is too low to do something useful, emit 'PLC' frames.*/
        int tocmode = st->mode;
        int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth;
        if (tocmode==0)
           tocmode = MODE_SILK_ONLY;
        if (frame_rate>100)
           tocmode = MODE_CELT_ONLY;
        if (frame_rate < 50)
           tocmode = MODE_SILK_ONLY;
        if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND)
           bw=OPUS_BANDWIDTH_WIDEBAND;
        else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND)
           bw=OPUS_BANDWIDTH_NARROWBAND;
-       else if (bw<=OPUS_BANDWIDTH_SUPERWIDEBAND)
+       else if (tocmode==MODE_HYBRID&&bw<=OPUS_BANDWIDTH_SUPERWIDEBAND)
           bw=OPUS_BANDWIDTH_SUPERWIDEBAND;
        data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels);
+       ret = 1;
+       if (!st->use_vbr)
+       {
+          ret = opus_packet_pad(data, ret, max_data_bytes);
+          if (ret == OPUS_OK)
+             ret = max_data_bytes;
+       }
        RESTORE_STACK;
-       return 1;
-    }
-    if (!st->use_vbr)
-    {
-       int cbrBytes;
-       cbrBytes = IMIN( (st->bitrate_bps + 4*frame_rate)/(8*frame_rate) , max_data_bytes);
-       st->bitrate_bps = cbrBytes * (8*frame_rate);
-       max_data_bytes = cbrBytes;
+       return ret;
     }
     max_rate = frame_rate*max_data_bytes*8;
 
     /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */
     equiv_rate = st->bitrate_bps - (40*st->channels+20)*(st->Fs/frame_size - 50);
 
     if (st->signal_type == OPUS_SIGNAL_VOICE)
        voice_est = 127;
@@ -1508,17 +1517,17 @@ opus_int32 opus_encode_native(OpusEncode
                 st->silk_mode.bitRate = total_bitRate * 4/5;
             }
             if (!st->energy_masking)
             {
                /* Increasingly attenuate high band when it gets allocated fewer bits */
                celt_rate = total_bitRate - st->silk_mode.bitRate;
                HB_gain_ref = (curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND) ? 3000 : 3600;
                HB_gain = SHL32((opus_val32)celt_rate, 9) / SHR32((opus_val32)celt_rate + st->stream_channels * HB_gain_ref, 6);
-               HB_gain = HB_gain < Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE;
+               HB_gain = HB_gain < (opus_val32)Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE;
             }
         } else {
             /* SILK gets all bits */
             st->silk_mode.bitRate = total_bitRate;
         }
 
         /* Surround masking for SILK */
         if (st->energy_masking && st->use_vbr && !st->lfe)
--- a/media/libopus/src/opus_multistream_encoder.c
+++ b/media/libopus/src/opus_multistream_encoder.c
@@ -65,23 +65,32 @@ typedef void (*opus_copy_channel_in_func
   opus_val16 *dst,
   int dst_stride,
   const void *src,
   int src_stride,
   int src_channel,
   int frame_size
 );
 
+typedef enum {
+  MAPPING_TYPE_NONE,
+  MAPPING_TYPE_SURROUND
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+  ,  /* Do not include comma at end of enumerator list */
+  MAPPING_TYPE_AMBISONICS
+#endif
+} MappingType;
+
 struct OpusMSEncoder {
    ChannelLayout layout;
    int arch;
    int lfe_stream;
    int application;
    int variable_duration;
-   int surround;
+   MappingType mapping_type;
    opus_int32 bitrate_bps;
    float subframe_mem[3];
    /* Encoder states go here */
    /* then opus_val32 window_mem[channels*120]; */
    /* then opus_val32 preemph_mem[channels]; */
 };
 
 static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st)
@@ -237,16 +246,17 @@ void surround_analysis(const CELTMode *c
    VARDECL(opus_val32, in);
    VARDECL(opus_val16, x);
    VARDECL(opus_val32, freq);
    SAVE_STACK;
 
    upsample = resampling_factor(rate);
    frame_size = len*upsample;
 
+   /* LM = log2(frame_size / 120) */
    for (LM=0;LM<celt_mode->maxLM;LM++)
       if (celt_mode->shortMdctSize<<LM==frame_size)
          break;
 
    ALLOC(in, frame_size+overlap, opus_val32);
    ALLOC(x, len, opus_val16);
    ALLOC(freq, frame_size, opus_val32);
 
@@ -393,53 +403,58 @@ opus_int32 opus_multistream_surround_enc
    } else if (mapping_family==1 && channels<=8 && channels>=1)
    {
       nb_streams=vorbis_mappings[channels-1].nb_streams;
       nb_coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams;
    } else if (mapping_family==255)
    {
       nb_streams=channels;
       nb_coupled_streams=0;
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+   } else if (mapping_family==254)
+   {
+      nb_streams=channels;
+      nb_coupled_streams=0;
+#endif
    } else
       return 0;
    size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams);
    if (channels>2)
    {
       size += channels*(120*sizeof(opus_val32) + sizeof(opus_val32));
    }
    return size;
 }
 
-
 static int opus_multistream_encoder_init_impl(
       OpusMSEncoder *st,
       opus_int32 Fs,
       int channels,
       int streams,
       int coupled_streams,
       const unsigned char *mapping,
       int application,
-      int surround
+      MappingType mapping_type
 )
 {
    int coupled_size;
    int mono_size;
    int i, ret;
    char *ptr;
 
    if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
        (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
       return OPUS_BAD_ARG;
 
    st->arch = opus_select_arch();
    st->layout.nb_channels = channels;
    st->layout.nb_streams = streams;
    st->layout.nb_coupled_streams = coupled_streams;
    st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0;
-   if (!surround)
+   if (mapping_type != MAPPING_TYPE_SURROUND)
       st->lfe_stream = -1;
    st->bitrate_bps = OPUS_AUTO;
    st->application = application;
    st->variable_duration = OPUS_FRAMESIZE_ARG;
    for (i=0;i<st->layout.nb_channels;i++)
       st->layout.mapping[i] = mapping[i];
    if (!validate_layout(&st->layout) || !validate_encoder_layout(&st->layout))
       return OPUS_BAD_ARG;
@@ -458,49 +473,53 @@ static int opus_multistream_encoder_init
    for (;i<st->layout.nb_streams;i++)
    {
       ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 1, application);
       if (i==st->lfe_stream)
          opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1));
       if(ret!=OPUS_OK)return ret;
       ptr += align(mono_size);
    }
-   if (surround)
+   if (mapping_type == MAPPING_TYPE_SURROUND)
    {
       OPUS_CLEAR(ms_get_preemph_mem(st), channels);
       OPUS_CLEAR(ms_get_window_mem(st), channels*120);
    }
-   st->surround = surround;
+   st->mapping_type = mapping_type;
    return OPUS_OK;
 }
 
 int opus_multistream_encoder_init(
       OpusMSEncoder *st,
       opus_int32 Fs,
       int channels,
       int streams,
       int coupled_streams,
       const unsigned char *mapping,
       int application
 )
 {
-   return opus_multistream_encoder_init_impl(st, Fs, channels, streams, coupled_streams, mapping, application, 0);
+   return opus_multistream_encoder_init_impl(st, Fs, channels, streams,
+                                             coupled_streams, mapping,
+                                             application, MAPPING_TYPE_NONE);
 }
 
 int opus_multistream_surround_encoder_init(
       OpusMSEncoder *st,
       opus_int32 Fs,
       int channels,
       int mapping_family,
       int *streams,
       int *coupled_streams,
       unsigned char *mapping,
       int application
 )
 {
+   MappingType mapping_type;
+
    if ((channels>255) || (channels<1))
       return OPUS_BAD_ARG;
    st->lfe_stream = -1;
    if (mapping_family==0)
    {
       if (channels==1)
       {
          *streams=1;
@@ -525,20 +544,42 @@ int opus_multistream_surround_encoder_in
          st->lfe_stream = *streams-1;
    } else if (mapping_family==255)
    {
       int i;
       *streams=channels;
       *coupled_streams=0;
       for(i=0;i<channels;i++)
          mapping[i] = i;
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+   } else if (mapping_family==254)
+   {
+      int i;
+      *streams=channels;
+      *coupled_streams=0;
+      for(i=0;i<channels;i++)
+         mapping[i] = i;
+#endif
    } else
       return OPUS_UNIMPLEMENTED;
-   return opus_multistream_encoder_init_impl(st, Fs, channels, *streams, *coupled_streams,
-         mapping, application, channels>2&&mapping_family==1);
+
+   if (channels>2 && mapping_family==1) {
+      mapping_type = MAPPING_TYPE_SURROUND;
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+   } else if (mapping_family==254)
+   {
+      mapping_type = MAPPING_TYPE_AMBISONICS;
+#endif
+   } else
+   {
+      mapping_type = MAPPING_TYPE_NONE;
+   }
+   return opus_multistream_encoder_init_impl(st, Fs, channels, *streams,
+                                             *coupled_streams, mapping,
+                                             application, mapping_type);
 }
 
 OpusMSEncoder *opus_multistream_encoder_create(
       opus_int32 Fs,
       int channels,
       int streams,
       int coupled_streams,
       const unsigned char *mapping,
@@ -613,34 +654,29 @@ OpusMSEncoder *opus_multistream_surround
       opus_free(st);
       st = NULL;
    }
    if (error)
       *error = ret;
    return st;
 }
 
-static opus_int32 surround_rate_allocation(
+static void surround_rate_allocation(
       OpusMSEncoder *st,
       opus_int32 *rate,
-      int frame_size
+      int frame_size,
+      opus_int32 Fs
       )
 {
    int i;
    opus_int32 channel_rate;
-   opus_int32 Fs;
-   char *ptr;
    int stream_offset;
    int lfe_offset;
    int coupled_ratio; /* Q8 */
    int lfe_ratio;     /* Q8 */
-   opus_int32 rate_sum=0;
-
-   ptr = (char*)st + align(sizeof(OpusMSEncoder));
-   opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
 
    if (st->bitrate_bps > st->layout.nb_channels*40000)
       stream_offset = 20000;
    else
       stream_offset = st->bitrate_bps/st->layout.nb_channels/2;
    stream_offset += 60*(Fs/frame_size-50);
    /* We start by giving each stream (coupled or uncoupled) the same bitrate.
       This models the main saving of coupled channels over uncoupled. */
@@ -683,16 +719,98 @@ static opus_int32 surround_rate_allocati
    for (i=0;i<st->layout.nb_streams;i++)
    {
       if (i<st->layout.nb_coupled_streams)
          rate[i] = stream_offset+(channel_rate*coupled_ratio>>8);
       else if (i!=st->lfe_stream)
          rate[i] = stream_offset+channel_rate;
       else
          rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8);
+   }
+}
+
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+static void ambisonics_rate_allocation(
+      OpusMSEncoder *st,
+      opus_int32 *rate,
+      int frame_size,
+      opus_int32 Fs
+      )
+{
+   int i;
+   int non_mono_rate;
+   int total_rate;
+
+   /* The mono channel gets (rate_ratio_num / rate_ratio_den) times as many bits
+    * as all other channels */
+   const int rate_ratio_num = 4;
+   const int rate_ratio_den = 3;
+   const int num_channels = st->layout.nb_streams;
+
+   if (st->bitrate_bps==OPUS_AUTO)
+   {
+      total_rate = num_channels * (20000 + st->layout.nb_streams*(Fs+60*Fs/frame_size));
+   } else if (st->bitrate_bps==OPUS_BITRATE_MAX)
+   {
+      total_rate = num_channels * 320000;
+   } else {
+      total_rate = st->bitrate_bps;
+   }
+
+   /* Let y be the non-mono rate and let p, q be integers such that the mono
+    * channel rate is (p/q) * y.
+    * Also let T be the total bitrate to allocate. Then
+    *   (n - 1) y + (p/q) y = T
+    *   y = (T q) / (qn - q + p)
+    */
+   non_mono_rate =
+         total_rate * rate_ratio_den
+         / (rate_ratio_den*num_channels + rate_ratio_num - rate_ratio_den);
+
+#ifndef FIXED_POINT
+   if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50)
+   {
+      opus_int32 bonus = 60*(Fs/frame_size-50);
+      non_mono_rate += bonus;
+   }
+#endif
+
+   rate[0] = total_rate - (num_channels - 1) * non_mono_rate;
+   for (i=1;i<st->layout.nb_streams;i++)
+   {
+      rate[i] = non_mono_rate;
+   }
+}
+#endif /* ENABLE_EXPERIMENTAL_AMBISONICS */
+
+static opus_int32 rate_allocation(
+      OpusMSEncoder *st,
+      opus_int32 *rate,
+      int frame_size
+      )
+{
+   int i;
+   opus_int32 rate_sum=0;
+   opus_int32 Fs;
+   char *ptr;
+
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
+
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+   if (st->mapping_type == MAPPING_TYPE_AMBISONICS) {
+     ambisonics_rate_allocation(st, rate, frame_size, Fs);
+   } else
+#endif
+   {
+     surround_rate_allocation(st, rate, frame_size, Fs);
+   }
+
+   for (i=0;i<st->layout.nb_streams;i++)
+   {
       rate[i] = IMAX(rate[i], 500);
       rate_sum += rate[i];
    }
    return rate_sum;
 }
 
 /* Max size in case the encoder decides to return three frames */
 #define MS_FRAME_TMP (3*1275+7)
@@ -725,17 +843,17 @@ static int opus_multistream_encode_nativ
    opus_val16 bandLogE[42];
    opus_val32 *mem = NULL;
    opus_val32 *preemph_mem=NULL;
    int frame_size;
    opus_int32 rate_sum;
    opus_int32 smallest_packet;
    ALLOC_STACK;
 
-   if (st->surround)
+   if (st->mapping_type == MAPPING_TYPE_SURROUND)
    {
       preemph_mem = ms_get_preemph_mem(st);
       mem = ms_get_window_mem(st);
    }
 
    ptr = (char*)st + align(sizeof(OpusMSEncoder));
    opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
    opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_VBR(&vbr));
@@ -779,23 +897,23 @@ static int opus_multistream_encode_nativ
       RESTORE_STACK;
       return OPUS_BUFFER_TOO_SMALL;
    }
    ALLOC(buf, 2*frame_size, opus_val16);
    coupled_size = opus_encoder_get_size(2);
    mono_size = opus_encoder_get_size(1);
 
    ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);
-   if (st->surround)
+   if (st->mapping_type == MAPPING_TYPE_SURROUND)
    {
       surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in, st->arch);
    }
 
    /* Compute bitrate allocation between streams (this could be a lot better) */
-   rate_sum = surround_rate_allocation(st, bitrates, frame_size);
+   rate_sum = rate_allocation(st, bitrates, frame_size);
 
    if (!vbr)
    {
       if (st->bitrate_bps == OPUS_AUTO)
       {
          max_data_bytes = IMIN(max_data_bytes, 3*rate_sum/(3*8*Fs/frame_size));
       } else if (st->bitrate_bps != OPUS_BITRATE_MAX)
       {
@@ -808,17 +926,17 @@ static int opus_multistream_encode_nativ
    {
       OpusEncoder *enc;
       enc = (OpusEncoder*)ptr;
       if (s < st->layout.nb_coupled_streams)
          ptr += align(coupled_size);
       else
          ptr += align(mono_size);
       opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s]));
-      if (st->surround)
+      if (st->mapping_type == MAPPING_TYPE_SURROUND)
       {
          opus_int32 equiv_rate;
          equiv_rate = st->bitrate_bps;
          if (frame_size*50 < Fs)
             equiv_rate -= 60*(Fs/frame_size - 50)*st->layout.nb_channels;
          if (equiv_rate > 10000*st->layout.nb_channels)
             opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
          else if (equiv_rate > 7000*st->layout.nb_channels)
@@ -829,66 +947,72 @@ static int opus_multistream_encode_nativ
             opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND));
          if (s < st->layout.nb_coupled_streams)
          {
             /* To preserve the spatial image, force stereo CELT on coupled streams */
             opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
             opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2));
          }
       }
+#ifdef ENABLE_EXPERIMENTAL_AMBISONICS
+      else if (st->mapping_type == MAPPING_TYPE_AMBISONICS) {
+        opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
+      }
+#endif
    }
 
    ptr = (char*)st + align(sizeof(OpusMSEncoder));
    /* Counting ToC */
    tot_size = 0;
    for (s=0;s<st->layout.nb_streams;s++)
    {
       OpusEncoder *enc;
       int len;
       int curr_max;
       int c1, c2;
+      int ret;
 
       opus_repacketizer_init(&rp);
       enc = (OpusEncoder*)ptr;
       if (s < st->layout.nb_coupled_streams)
       {
          int i;
          int left, right;
          left = get_left_channel(&st->layout, s, -1);
          right = get_right_channel(&st->layout, s, -1);
          (*copy_channel_in)(buf, 2,
             pcm, st->layout.nb_channels, left, frame_size);
          (*copy_channel_in)(buf+1, 2,
             pcm, st->layout.nb_channels, right, frame_size);
          ptr += align(coupled_size);
-         if (st->surround)
+         if (st->mapping_type == MAPPING_TYPE_SURROUND)
          {
             for (i=0;i<21;i++)
             {
                bandLogE[i] = bandSMR[21*left+i];
                bandLogE[21+i] = bandSMR[21*right+i];
             }
          }
          c1 = left;
          c2 = right;
       } else {
          int i;
          int chan = get_mono_channel(&st->layout, s, -1);
          (*copy_channel_in)(buf, 1,
             pcm, st->layout.nb_channels, chan, frame_size);
          ptr += align(mono_size);
-         if (st->surround)
+         if (st->mapping_type == MAPPING_TYPE_SURROUND)
          {
             for (i=0;i<21;i++)
                bandLogE[i] = bandSMR[21*chan+i];
          }
          c1 = chan;
          c2 = -1;
       }
-      if (st->surround)
+      if (st->mapping_type == MAPPING_TYPE_SURROUND)
          opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));
       /* number of bytes left (+Toc) */
       curr_max = max_data_bytes - tot_size;
       /* Reserve one byte for the last stream and two for the others */
       curr_max -= IMAX(0,2*(st->layout.nb_streams-s-1)-1);
       curr_max = IMIN(curr_max,MS_FRAME_TMP);
       /* Repacketizer will add one or two bytes for self-delimited frames */
       if (s != st->layout.nb_streams-1) curr_max -=  curr_max>253 ? 2 : 1;
@@ -899,17 +1023,24 @@ static int opus_multistream_encode_nativ
       if (len<0)
       {
          RESTORE_STACK;
          return len;
       }
       /* We need to use the repacketizer to add the self-delimiting lengths
          while taking into account the fact that the encoder can now return
          more than one frame at a time (e.g. 60 ms CELT-only) */
-      opus_repacketizer_cat(&rp, tmp_data, len);
+      ret = opus_repacketizer_cat(&rp, tmp_data, len);
+      /* If the opus_repacketizer_cat() fails, then something's seriously wrong
+         with the encoder. */
+      if (ret != OPUS_OK)
+      {
+         RESTORE_STACK;
+         return OPUS_INTERNAL_ERROR;
+      }
       len = opus_repacketizer_out_range_impl(&rp, 0, opus_repacketizer_get_nb_frames(&rp),
             data, max_data_bytes-tot_size, s != st->layout.nb_streams-1, !vbr && s == st->layout.nb_streams-1);
       data += len;
       tot_size += len;
    }
    /*printf("\n");*/
    RESTORE_STACK;
    return tot_size;
@@ -1178,17 +1309,17 @@ int opus_multistream_encoder_ctl(OpusMSE
        }
        *value = st->variable_duration;
    }
    break;
    case OPUS_RESET_STATE:
    {
       int s;
       st->subframe_mem[0] = st->subframe_mem[1] = st->subframe_mem[2] = 0;
-      if (st->surround)
+      if (st->mapping_type == MAPPING_TYPE_SURROUND)
       {
          OPUS_CLEAR(ms_get_preemph_mem(st), st->layout.nb_channels);
          OPUS_CLEAR(ms_get_window_mem(st), st->layout.nb_channels*120);
       }
       for (s=0;s<st->layout.nb_streams;s++)
       {
          OpusEncoder *enc;
          enc = (OpusEncoder*)ptr;
--- a/media/libopus/src/repacketizer.c
+++ b/media/libopus/src/repacketizer.c
@@ -244,17 +244,19 @@ int opus_packet_pad(unsigned char *data,
       return OPUS_BAD_ARG;
    if (len==new_len)
       return OPUS_OK;
    else if (len > new_len)
       return OPUS_BAD_ARG;
    opus_repacketizer_init(&rp);
    /* Moving payload to the end of the packet so we can do in-place padding */
    OPUS_MOVE(data+new_len-len, data, len);
-   opus_repacketizer_cat(&rp, data+new_len-len, len);
+   ret = opus_repacketizer_cat(&rp, data+new_len-len, len);
+   if (ret != OPUS_OK)
+      return ret;
    ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, new_len, 0, 1);
    if (ret > 0)
       return OPUS_OK;
    else
       return ret;
 }
 
 opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len)