Bug 1374870 - opus: Update to the 1.2 release. r?kinetik
New upstream release with only minor cleanup after 1.2-rc1.
- Speech quality improvements especially in the 12-24 kbit/s range
- Improved VBR encoding for hybrid mode
- More aggressive use of wider speech bandwidth, including fullband speech
starting at 14 kbit/s
- Music quality improvements in the 32-48 kb/s range
- More optimizations for x86 (SSEx) and ARM Neon
- Support for directly encoding packets up to 120 ms
- DTX support for CELT mode
- SILK CBR improvements
- Support for all of the fixes in draft-ietf-codec-opus-update-04 (the mono
downmix and the folding fixes need --enable-update-draft)
- Many bug fixes, including integer overflows discovered through fuzzing
(no security implications)
MozReview-Commit-ID: CDVdiu3R4qT
--- a/media/libopus/README_MOZILLA
+++ b/media/libopus/README_MOZILLA
@@ -3,9 +3,9 @@ IETF Opus audio codec reference implemen
The source in this directory was copied from an opus
repository checkout by running the ./update.sh script.
Any changes made to this version of the source should
be reflected in that script, e.g. by applying patch
files after the copy step.
The upstream repository is https://git.xiph.org/opus.git
-The git tag/revision used was v1.2-rc1.
+The git tag/revision used was v1.2.
--- a/media/libopus/moz.build
+++ b/media/libopus/moz.build
@@ -15,17 +15,17 @@ EXPORTS.opus += [
]
# We allow warnings for third-party code that can be updated from upstream.
ALLOW_COMPILER_WARNINGS = True
FINAL_LIBRARY = 'gkmedias'
DEFINES['OPUS_BUILD'] = True
-DEFINES['OPUS_VERSION'] = '"v1.2-rc1-mozilla"'
+DEFINES['OPUS_VERSION'] = '"v1.2-mozilla"'
DEFINES['USE_ALLOCA'] = True
# Don't export symbols
DEFINES['OPUS_EXPORT'] = ''
if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['GNU_AS']:
DEFINES['OPUS_ARM_ASM'] = True
DEFINES['OPUS_ARM_EXTERNAL_ASM'] = True
--- a/media/libopus/sources.mozbuild
+++ b/media/libopus/sources.mozbuild
@@ -52,21 +52,23 @@ celt_sources_arm_asm = [
'celt/arm/celt_pitch_xcorr_arm.s',
]
celt_am_sources_arm_asm = [
'celt/arm/armopts.s.in',
]
celt_sources_arm_neon_intr = [
+ 'celt/arm/celt_neon_intr.c',
+ 'celt/arm/pitch_neon_intr.c',
+]
+
+celt_sources_arm_ne10 = [
'celt/arm/celt_ne10_fft.c',
'celt/arm/celt_ne10_mdct.c',
- 'celt/arm/celt_neon_intr.c',
- 'celt/arm/pitch_neon_intr.c',
- 'CELT_SOURCES_ARM_NE10=',
]
opus_sources = [
'src/opus.c',
'src/opus_decoder.c',
'src/opus_encoder.c',
'src/opus_multistream.c',
'src/opus_multistream_decoder.c',
--- a/media/libopus/src/analysis.c
+++ b/media/libopus/src/analysis.c
@@ -228,16 +228,19 @@ void tonality_analysis_reset(TonalityAna
tonal->speech_confidence = .1f;
}
void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
{
int pos;
int curr_lookahead;
float psum;
+ float tonality_max;
+ float tonality_avg;
+ int tonality_count;
int i;
pos = tonal->read_pos;
curr_lookahead = tonal->write_pos-tonal->read_pos;
if (curr_lookahead<0)
curr_lookahead += DETECT_SIZE;
/* On long frames, look at the second analysis window rather than the first. */
@@ -247,26 +250,31 @@ void tonality_get_info(TonalityAnalysisS
if (pos==DETECT_SIZE)
pos=0;
}
if (pos == tonal->write_pos)
pos--;
if (pos<0)
pos = DETECT_SIZE-1;
OPUS_COPY(info_out, &tonal->info[pos], 1);
+ tonality_max = tonality_avg = info_out->tonality;
+ tonality_count = 1;
/* If possible, look ahead for a tone to compensate for the delay in the tone detector. */
for (i=0;i<3;i++)
{
pos++;
if (pos==DETECT_SIZE)
pos = 0;
if (pos == tonal->write_pos)
break;
- info_out->tonality = MAX32(0, -.03f + MAX32(info_out->tonality, tonal->info[pos].tonality-.05f));
+ tonality_max = MAX32(tonality_max, tonal->info[pos].tonality);
+ tonality_avg += tonal->info[pos].tonality;
+ tonality_count++;
}
+ info_out->tonality = MAX32(tonality_avg/tonality_count, tonality_max-.2f);
tonal->read_subframe += len/(tonal->Fs/400);
while (tonal->read_subframe>=8)
{
tonal->read_subframe -= 8;
tonal->read_pos++;
}
if (tonal->read_pos>=DETECT_SIZE)
tonal->read_pos-=DETECT_SIZE;
@@ -357,19 +365,19 @@ static void tonality_analysis(TonalityAn
offset /= 2;
} else if (tonal->Fs == 16000) {
len = 3*len/2;
offset = 3*offset/2;
}
if (tonal->count<4) {
if (tonal->application == OPUS_APPLICATION_VOIP)
- tonal->music_prob = .1;
+ tonal->music_prob = .1f;
else
- tonal->music_prob = .625;
+ tonal->music_prob = .625f;
}
kfft = celt_mode->mdct.kfft[0];
if (tonal->count==0)
tonal->mem_fill = 240;
tonal->hp_ener_accum += (float)downmix_and_resample(downmix, x,
&tonal->inmem[tonal->mem_fill], tonal->downmix_state,
IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C, tonal->Fs);
if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
@@ -690,17 +698,17 @@ static void tonality_analysis(TonalityAn
for (b=0;b<16;b++)
sum += dct_table[i*16+b]*.5f*(tonal->highE[b]+tonal->lowE[b]);
midE[i] = sum;
}
frame_stationarity /= NB_TBANDS;
relativeE /= NB_TBANDS;
if (tonal->count<10)
- relativeE = .5;
+ relativeE = .5f;
frame_noisiness /= NB_TBANDS;
#if 1
info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
#else
info->activity = .5*(1+frame_noisiness-frame_stationarity);
#endif
frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f);
@@ -820,19 +828,19 @@ static void tonality_analysis(TonalityAn
/* This chunk of code deals with delayed decision. */
psum=1e-20f;
/* Instantaneous probability of speech and music, with beta pre-applied. */
speech0 = (float)pow(1-frame_probs[0], beta);
music0 = (float)pow(frame_probs[0], beta);
if (tonal->count==1)
{
if (tonal->application == OPUS_APPLICATION_VOIP)
- tonal->pmusic[0] = .1;
+ tonal->pmusic[0] = .1f;
else
- tonal->pmusic[0] = .625;
+ tonal->pmusic[0] = .625f;
tonal->pspeech[0] = 1-tonal->pmusic[0];
}
/* Updated probability of having only speech (s0) or only music (m0),
before considering the new observation. */
s0 = tonal->pspeech[0] + tonal->pspeech[1];
m0 = tonal->pmusic [0] + tonal->pmusic [1];
/* Updates s0 and m0 with instantaneous probability. */
tonal->pspeech[0] = s0*(1-tau)*speech0;