author Nicholas Nethercote <nnethercote@mozilla.com>

Mon, 19 Feb 2018 16:55:48 +1100

changeset 756884 2c21e3179b7c7bc83a74629f94fa4aeebae6f534

parent 756883 a5e20bb76cf3ae1cf2c4b48e2e386ef14798f8e6

push id 99575

push user nnethercote@mozilla.com

push date Mon, 19 Feb 2018 05:58:17 +0000

reviewers glandium

bugs 107264

milestone 60.0a1

modules/libpref/Preferences.cpp file | annotate | diff | comparison | revisions

modules/libpref/parser/src/lib.rs file | annotate | diff | comparison | revisions

modules/libpref/test/gtest/Parser.cpp file | annotate | diff | comparison | revisions
--- a/modules/libpref/Preferences.cpp
+++ b/modules/libpref/Preferences.cpp
@@ -1127,39 +1127,39 @@ static void
 TestParseErrorHandlePref(const char* aPrefName,
                          PrefType aType,
                          PrefValueKind aKind,
                          PrefValue aValue,
                          bool aIsSticky)
 {
 }
 
-static char* gTestParseErrorMsg;
+static nsCString gTestParseErrorMsgs;
 
 static void
 TestParseErrorHandleError(const char* aMsg)
 {
-  // aMsg's lifetime is shorter than we need, so duplicate it.
-  gTestParseErrorMsg = moz_xstrdup(aMsg);
+  gTestParseErrorMsgs.Append(aMsg);
+  gTestParseErrorMsgs.Append('\n');
 }
 
 // Keep this in sync with the declaration in test/gtest/Parser.cpp.
 void
 TestParseError(const char* aText, nsCString& aErrorMsg)
 {
   prefs_parser_parse("test",
                      aText,
                      strlen(aText),
                      TestParseErrorHandlePref,
                      TestParseErrorHandleError);
 
-  // Copy the duplicated error message into the outparam, then free it.
-  aErrorMsg.Assign(gTestParseErrorMsg);
-  free(gTestParseErrorMsg);
-  gTestParseErrorMsg = nullptr;
+  // Copy the error messages into the outparam, then clear them from
+  // gTestParseErrorMsgs.
+  aErrorMsg.Assign(gTestParseErrorMsgs);
+  gTestParseErrorMsgs.Truncate();
 }
 
 void
 SendTelemetryLoadData()
 {
   for (auto iter = gTelemetryLoadData->Iter(); !iter.Done(); iter.Next()) {
     const nsCString& filename = PromiseFlatCString(iter.Key());
     const TelemetryLoadData& data = iter.Data();
--- a/modules/libpref/parser/src/lib.rs
+++ b/modules/libpref/parser/src/lib.rs
@@ -36,16 +36,21 @@
 //! - \r\n
 //!
 //! The valid range for <int-value> is -2,147,483,648..2,147,483,647. Values
 //! outside that range will result in a parse error.
 //!
 //! A '\0' char is interpreted as the end of the file. The use of this character
 //! in a prefs file is not recommended. Within string literals \x00 or \u0000
 //! can be used instead.
+//!
+//! The parser performs error recovery. On a syntax error, it will scan forward
+//! to the next ';' token and then continue parsing. If the syntax error occurs
+//! in the middle of a token, it will first finish obtaining the current token
+//! in an appropriate fashion.
 
 // This parser uses several important optimizations.
 //
 // - Because "'\0' means EOF" is part of the grammar (see above) we can match
 //   EOF as a normal char/token, which means we can avoid a typical "do we
 //   still have chars remaining?" test in get_char(), which gives a speedup
 //   because get_char() is a very hot function. (Actually, Rust would
 //   bounds-check this function anyway, so we have get_char_unchecked() which
@@ -112,16 +117,20 @@ type PrefFn = unsafe extern "C" fn(pref_
 /// Keep this in sync with PrefsParserErrorFn in Preferences.cpp.
 type ErrorFn = unsafe extern "C" fn(msg: *const c_char);
 
 /// Parse the contents of a prefs file.
 ///
 /// `buf` is a null-terminated string. `len` is its length, excluding the
 /// null terminator.
 ///
+/// `pref_fn` is called once for each successfully parsed pref.
+///
+/// `error_fn` is called once for each parse error detected.
+///
 /// Keep this in sync with the prefs_parser_parse() declaration in
 /// Preferences.cpp.
 #[no_mangle]
 pub extern "C" fn prefs_parser_parse(path: *const c_char, buf: *const c_char, len: usize,
                                      pref_fn: PrefFn, error_fn: ErrorFn) -> bool {
     let path = unsafe { std::ffi::CStr::from_ptr(path).to_string_lossy().into_owned() };
 
     // Make sure `buf` ends in a '\0', and include that in the length, because
@@ -157,16 +166,23 @@ enum Token {
     // tokens, so these token values are always positive. Furthermore, we
     // tokenize int literals as u32 so that 2147483648 (which doesn't fit into
     // an i32) can be subsequently negated to -2147483648 (which does fit into
     // an i32) if a '-' token precedes it.
     Int(u32),
 
     // Malformed token.
     Error(&'static str),
+
+    // Malformed token at a particular line number. For use when
+    // Parser::line_num might not be the right line number when the error is
+    // reported. E.g. if a multi-line string has a bad escape sequence on the
+    // first line, we don't report the error until the string's end has been
+    // reached.
+    ErrorAtLine(&'static str, u32),
 }
 
 // We categorize every char by what action should be taken when it appears at
 // the start of a new token.
 #[derive(Clone, Copy, PartialEq)]
 enum CharKind {
     // These are ordered by frequency. See the comment in GetToken().
     SingleChar, // Unambiguous single-char tokens: [()+,-]
@@ -267,16 +283,17 @@ const KEYWORD_INFOS: &[KeywordInfo; 5] =
 
 struct Parser<'t> {
     path: &'t str,      // Path to the file being parsed. Used in error messages.
     buf: &'t [u8],      // Text being parsed.
     i: usize,           // Index of next char to be read.
     line_num: u32,      // Current line number within the text.
     pref_fn: PrefFn,    // Callback for processing each pref.
     error_fn: ErrorFn,  // Callback for parse errors.
+    has_errors: bool,   // Have we encountered errors?
 }
 
 // As described above, we use 0 to represent EOF.
 const EOF: u8 = b'\0';
 
 impl<'t> Parser<'t> {
     fn new(path: &'t str, buf: &'t [u8], pref_fn: PrefFn, error_fn: ErrorFn) -> Parser<'t> {
         // Make sure these tables take up 1 byte per entry.
@@ -285,74 +302,69 @@ impl<'t> Parser<'t> {
 
         Parser {
             path: path,
             buf: buf,
             i: 0,
             line_num: 1,
             pref_fn: pref_fn,
             error_fn: error_fn,
+            has_errors: false,
         }
     }
 
     fn parse(&mut self) -> bool {
         // These are reused, because allocating a new Vec for every string is slow.
         let mut name_str  = Vec::with_capacity(128); // For pref names.
         let mut value_str = Vec::with_capacity(512); // For string pref values.
         let mut none_str  = Vec::with_capacity(0);   // For tokens that shouldn't be strings.
 
-        loop {
-            // Note: if you add error recovery here, be aware that the
-            // erroneous char may have been the text-ending EOF, in which case
-            // self.i will point one past the end of the text. You should check
-            // for that possibility before getting more chars.
+        let mut token = self.get_token(&mut none_str);
 
-            // EOF?
-            let token = self.get_token(&mut none_str);
-            if token == Token::SingleChar(EOF) {
-                break;
-            }
-
+        // At the top of the loop we already have a token. In a valid input
+        // this will be either the first token of a new pref, or EOF.
+        loop {
             // <pref-spec>
             let (pref_value_kind, is_sticky) = match token {
-                Token::Pref => {
-                    (PrefValueKind::Default, false)
-                }
-                Token::StickyPref => {
-                    (PrefValueKind::Default, true)
+                Token::Pref => (PrefValueKind::Default, false),
+                Token::StickyPref => (PrefValueKind::Default, true),
+                Token::UserPref => (PrefValueKind::User, false),
+                Token::SingleChar(EOF) => return !self.has_errors,
+                _ => {
+                    token = self.error(token,
+                                       "expected pref specifier at start of pref definition");
+                    continue;
                 }
-                Token::UserPref => {
-                    (PrefValueKind::User, false)
-                }
-                _ => return self.error(token,
-                                       "expected pref specifier at start of pref definition")
             };
 
             // "("
-            let token = self.get_token(&mut none_str);
+            token = self.get_token(&mut none_str);
             if token != Token::SingleChar(b'(') {
-                return self.error(token, "expected '(' after pref specifier");
+                token = self.error(token, "expected '(' after pref specifier");
+                continue;
             }
 
             // <pref-name>
-            let token = self.get_token(&mut name_str);
+            token = self.get_token(&mut name_str);
             let pref_name = if token == Token::String {
                 &name_str
             } else {
-                return self.error(token, "expected pref name after '('");
+                token = self.error(token, "expected pref name after '('");
+                continue;
             };
 
             // ","
-            let token = self.get_token(&mut none_str);
+            token = self.get_token(&mut none_str);
             if token != Token::SingleChar(b',') {
-                return self.error(token, "expected ',' after pref name");
+                token = self.error(token, "expected ',' after pref name");
+                continue;
             }
 
             // <pref-value>
-            let token = self.get_token(&mut value_str);
+            token = self.get_token(&mut value_str);
             let (pref_type, pref_value) = match token {
                 Token::True => {
                     (PrefType::Bool, PrefValue { bool_val: true })
                 }
                 Token::False => {
                     (PrefType::Bool, PrefValue { bool_val: false })
                 }
                 Token::String => {
@@ -360,99 +372,130 @@ impl<'t> Parser<'t> {
                      PrefValue { string_val: value_str.as_ptr() as *const c_char })
 
                 }
                 Token::Int(u) => {
                     // Accept u <= 2147483647; anything larger will overflow i32.
                     if u <= std::i32::MAX as u32 {
                         (PrefType::Int, PrefValue { int_val: u as i32 })
                     } else {
-                        return self.error(Token::Error("integer literal overflowed"), "");
+                        token = self.error(Token::Error("integer literal overflowed"), "");
+                        continue;
                     }
-
                 }
                 Token::SingleChar(b'-') => {
-                    let token = self.get_token(&mut none_str);
+                    token = self.get_token(&mut none_str);
                     if let Token::Int(u) = token {
                         // Accept u <= 2147483648; anything larger will overflow i32 once negated.
                         if u <= std::i32::MAX as u32 {
                             (PrefType::Int, PrefValue { int_val: -(u as i32) })
                         } else if u == std::i32::MAX as u32 + 1 {
                             (PrefType::Int, PrefValue { int_val: std::i32::MIN })
                         } else {
-                            return self.error(Token::Error("integer literal overflowed"), "");
+                            token = self.error(Token::Error("integer literal overflowed"), "");
+                            continue;
                         }
                     } else {
-                        return self.error(token, "expected integer literal after '-'");
+                        token = self.error(token, "expected integer literal after '-'");
+                        continue;
                     }
 
                 }
                 Token::SingleChar(b'+') => {
-                    let token = self.get_token(&mut none_str);
+                    token = self.get_token(&mut none_str);
                     if let Token::Int(u) = token {
                         // Accept u <= 2147483647; anything larger will overflow i32.
                         if u <= std::i32::MAX as u32 {
                             (PrefType::Int, PrefValue { int_val: u as i32 })
                         } else {
-                            return self.error(Token::Error("integer literal overflowed"), "");
+                            token = self.error(Token::Error("integer literal overflowed"), "");
+                            continue;
                         }
                     } else {
-                        return self.error(token, "expected integer literal after '+'");
+                        token = self.error(token, "expected integer literal after '+'");
+                        continue;
                     }
 
                 }
-                _ => return self.error(token, "expected pref value after ','")
+                _ => {
+                    token = self.error(token, "expected pref value after ','");
+                    continue;
+                }
             };
 
             // ")"
-            let token = self.get_token(&mut none_str);
+            token = self.get_token(&mut none_str);
             if token != Token::SingleChar(b')') {
-                return self.error(token, "expected ')' after pref value");
+                token = self.error(token, "expected ')' after pref value");
+                continue;
             }
 
             // ";"
-            let token = self.get_token(&mut none_str);
+            token = self.get_token(&mut none_str);
             if token != Token::SingleChar(b';') {
-                return self.error(token, "expected ';' after ')'");
+                token = self.error(token, "expected ';' after ')'");
+                continue;
             }
 
             unsafe { (self.pref_fn)(pref_name.as_ptr() as *const c_char, pref_type, pref_value_kind,
                                     pref_value, is_sticky) };
+
+            token = self.get_token(&mut none_str);
         }
-
-        true
     }
 
-    fn error(&self, token: Token, msg: &str) -> bool {
-        // If `token` is a Token::Error, it's a lexing error and the error
-        // message is within `token`. Otherwise, it's a parsing error and the
-        // error message is in `msg`.
-        let msg = if let Token::Error(token_msg) = token {
-            token_msg
-        } else {
-            msg
+    fn error(&mut self, token: Token, msg: &str) -> Token {
+        self.has_errors = true;
+
+        // If `token` is a Token::{Error,ErrorAtLine}, it's a lexing error and
+        // the error message is within `token`. Otherwise, it's a parsing error
+        // and the error message is in `msg`.
+        let (msg, line_num) = match token {
+            Token::Error(token_msg) => (token_msg, self.line_num),
+            Token::ErrorAtLine(token_msg, line_num) => (token_msg, line_num),
+            _ => (msg, self.line_num),
         };
-        let msg = format!("{}:{}: prefs parse error: {}", self.path, self.line_num, msg);
+        let msg = format!("{}:{}: prefs parse error: {}", self.path, line_num, msg);
         let msg = std::ffi::CString::new(msg).unwrap();
         unsafe { (self.error_fn)(msg.as_ptr() as *const c_char) };
 
-        false
+        // "Panic-mode" recovery: consume tokens until one of the following
+        // occurs.
+        // - We hit a semicolon, whereupon we return the following token.
+        // - We hit EOF, whereupon we return EOF.
+        //
+        // For this to work, if the lexing functions hit EOF in an error case
+        // they must unget it so we can safely reget it here.
+        //
+        // If the starting token (passed in above) is EOF we must not get
+        // another token otherwise we will read past the end of `self.buf`.
+        let mut dummy_str = Vec::with_capacity(128);
+        let mut token = token;
+        loop {
+            match token {
+                Token::SingleChar(b';') => return self.get_token(&mut dummy_str),
+                Token::SingleChar(EOF) => return token,
+                _ => {}
+            }
+            token = self.get_token(&mut dummy_str);
+        }
     }
 
     #[inline(always)]
     fn get_char(&mut self) -> u8 {
         let c = self.buf[self.i];
         self.i += 1;
         c
     }
 
-    // This function skips the bounds check. Using it at the hottest two call
-    // sites gives a ~15% parsing speed boost.
+    // This function skips the bounds check in non-optimized builds. Using it
+    // at the hottest two call sites gives a ~15% parsing speed boost.
     #[inline(always)]
     unsafe fn get_char_unchecked(&mut self) -> u8 {
+        debug_assert!(self.i < self.buf.len());
         let c = *self.buf.get_unchecked(self.i);
         self.i += 1;
         c
     }
 
     #[inline(always)]
     fn unget_char(&mut self) {
         debug_assert!(self.i > 0);
@@ -486,19 +529,17 @@ impl<'t> Parser<'t> {
                     break;
                 }
                 b'\r' => {
                     self.line_num += 1;
                     self.match_char(b'\n');
                     break;
                 }
                 EOF => {
-                    // We must unget the EOF otherwise we'll read past it the
-                    // next time around the main loop in get_token(), violating
-                    // self.buf's bounds.
+                    // Unget EOF so subsequent calls to get_char() are safe.
                     self.unget_char();
                     break;
                 }
                 _ => continue
             }
         }
     }
 
@@ -515,33 +556,39 @@ impl<'t> Parser<'t> {
                 b'\n' => {
                     self.line_num += 1;
                 }
                 b'\r' => {
                     self.line_num += 1;
                     self.match_char(b'\n');
                 }
                 EOF => {
+                    // Unget EOF so subsequent calls to get_char() are safe.
+                    self.unget_char();
                     return false
                 }
                 _ => continue
             }
         }
     }
 
     fn match_hex_digits(&mut self, ndigits: i32) -> Option<u16> {
         debug_assert!(ndigits == 2 || ndigits == 4);
         let mut value: u16 = 0;
         for _ in 0..ndigits {
             value = value << 4;
             match self.get_char() {
                 c @ b'0'... b'9' => value += (c - b'0') as u16,
                 c @ b'A'...b'F' => value += (c - b'A') as u16 + 10,
                 c @ b'a'...b'f' => value += (c - b'a') as u16 + 10,
-                _ => return None
+                _ => {
+                    // Unget in case the char was a closing quote or EOF.
+                    self.unget_char();
+                    return None;
+                }
             }
         }
         Some(value)
     }
 
     #[inline(always)]
     fn char_kind(c: u8) -> CharKind {
         // Use get_unchecked() because a u8 index cannot exceed this table's
@@ -615,57 +662,65 @@ impl<'t> Parser<'t> {
                                 return Token::Error("unterminated /* comment");
                             }
                         }
                         _ => return Token::Error("expected '/' or '*' after '/'")
                     }
                     continue;
                 }
                 CharKind::Digit => {
-                    let mut value = (c - b'0') as u32;
+                    let mut value = Some((c - b'0') as u32);
                     loop {
                         let c = self.get_char();
                         match Parser::char_kind(c) {
                             CharKind::Digit => {
-                                fn add_digit(v: u32, c: u8) -> Option<u32> {
-                                    v.checked_mul(10)?.checked_add((c - b'0') as u32)
+                                fn add_digit(value: Option<u32>, c: u8) -> Option<u32> {
+                                    value?.checked_mul(10)?.checked_add((c - b'0') as u32)
                                 }
-                                if let Some(v) = add_digit(value, c) {
-                                    value = v;
-                                } else {
-                                    return Token::Error("integer literal overflowed");
-                                }
+                                value = add_digit(value, c);
                             }
                             CharKind::Keyword => {
-                                // Reject things like "123foo".
-                                return Token::Error(
-                                    "unexpected character in integer literal");
+                                // Reject things like "123foo". Error recovery
+                                // will retokenize from "foo" onward.
+                                self.unget_char();
+                                return Token::Error("unexpected character in integer literal");
                             }
                             _ => {
                                 self.unget_char();
                                 break;
                             }
                         }
                     }
-                    return Token::Int(value);
+                    return match value {
+                        Some(v) => Token::Int(v),
+                        None => Token::Error("integer literal overflowed"),
+                    };
                 }
                 CharKind::Hash => {
                     self.match_single_line_comment();
                     continue;
                 }
                 CharKind::CR => {
                     self.match_char(b'\n');
                     self.line_num += 1;
                     continue;
                 }
+                // Error recovery will retokenize from the next character.
                 _ => return Token::Error("unexpected character")
             }
         }
     }
 
+    fn string_error_token(&self, token: &mut Token, msg: &'static str) {
+        // We only want to capture the first tokenization error within a string.
+        if *token == Token::String {
+            *token = Token::ErrorAtLine(msg, self.line_num);
+        }
+    }
+
     // Always inline this because it has a single call site.
     #[inline(always)]
     fn get_string_token(&mut self, quote_char: u8, str_buf: &mut Vec<u8>) -> Token {
         // First scan through the string to see if it contains any chars that
         // need special handling.
         let start = self.i;
         let has_special_chars = loop {
             // To reach here, the previous char must have been a quote
@@ -685,17 +740,22 @@ impl<'t> Parser<'t> {
         if !has_special_chars {
           str_buf.extend(&self.buf[start..self.i - 1]);
           str_buf.push(b'\0');
           return Token::String;
         }
 
         // There were special chars. Re-scan the string, filling in str_buf one
         // char at a time.
+        //
+        // On error, we change `token` to an error token and then keep going to
+        // the end of the string literal. `str_buf` won't be used in that case.
         self.i = start;
+        let mut token = Token::String;
+
         loop {
             let c = self.get_char();
             let c2 = if !Parser::is_special_string_char(c) {
                 c
 
             } else if c == quote_char {
                 break;
 
@@ -707,76 +767,93 @@ impl<'t> Parser<'t> {
                     b'n'  => b'\n',
                     b'r'  => b'\r',
                     b'x'  => {
                         if let Some(value) = self.match_hex_digits(2) {
                             debug_assert!(value <= 0xff);
                             if value != 0 {
                                 value as u8
                             } else {
-                                return Token::Error("\\x00 is not allowed");
+                                self.string_error_token(&mut token, "\\x00 is not allowed");
+                                continue;
                             }
                         } else {
-                            return Token::Error("malformed \\x escape sequence");
+                            self.string_error_token(&mut token, "malformed \\x escape sequence");
+                            continue;
                         }
                     }
                     b'u' => {
                         if let Some(value) = self.match_hex_digits(4) {
                             let mut utf16 = vec![value];
                             if 0xd800 == (0xfc00 & value) {
                                 // High surrogate value. Look for the low surrogate value.
                                 if self.match_char(b'\\') && self.match_char(b'u') {
                                     if let Some(lo) = self.match_hex_digits(4) {
                                         if 0xdc00 == (0xfc00 & lo) {
                                             // Found a valid low surrogate.
                                             utf16.push(lo);
                                         } else {
-                                            return Token::Error(
+                                            self.string_error_token(
+                                                &mut token,
                                                 "invalid low surrogate value after high surrogate");
+                                            continue;
                                         }
                                     }
                                 }
                                 if utf16.len() != 2 {
-                                    return Token::Error(
-                                        "expected low surrogate after high surrogate");
+                                    self.string_error_token(
+                                        &mut token, "expected low surrogate after high surrogate");
+                                    continue;
                                 }
                             } else if value == 0 {
-                                return Token::Error("\\u0000 is not allowed");
+                                self.string_error_token(&mut token, "\\u0000 is not allowed");
+                                continue;
                             }
 
                             // Insert the UTF-16 sequence as UTF-8.
                             let utf8 = String::from_utf16(&utf16).unwrap();
                             str_buf.extend(utf8.as_bytes());
                         } else {
-                            return Token::Error("malformed \\u escape sequence");
+                            self.string_error_token(&mut token, "malformed \\u escape sequence");
+                            continue;
                         }
                         continue; // We don't want to str_buf.push(c2) below.
                     }
-                    _ => return Token::Error("unexpected escape sequence character after '\\'")
+                    _ => {
+                        // Unget in case the char an EOF.
+                        self.unget_char();
+                        self.string_error_token(
+                            &mut token, "unexpected escape sequence character after '\\'");
+                        continue;
+                    }
                 }
 
             } else if c == b'\n' {
                 self.line_num += 1;
                 c
 
             } else if c == b'\r' {
                 self.line_num += 1;
                 if self.match_char(b'\n') {
                     str_buf.push(b'\r');
                     b'\n'
                 } else {
                     c
                 }
 
             } else if c == EOF {
-                return Token::Error("unterminated string literal");
+                // Unget EOF so subsequent calls to get_char() are safe.
+                self.unget_char();
+                self.string_error_token(&mut token, "unterminated string literal");
+                break;
 
             } else {
                 // This case is only hit for the non-closing quote char.
                 debug_assert!((c == b'\'' || c == b'\"') && c != quote_char);
                 c
             };
             str_buf.push(c2);
         }
         str_buf.push(b'\0');
-        return Token::String;
+
+        token
     }
 }
--- a/modules/libpref/test/gtest/Parser.cpp
+++ b/modules/libpref/test/gtest/Parser.cpp
@@ -23,503 +23,414 @@ TEST(PrefsParser, Errors)
   do {                                                                         \
     TestParseError(text_, actualErrorMsg);                                     \
     ASSERT_STREQ(expectedErrorMsg_, actualErrorMsg.get());                     \
   } while (0)
 
   // clang-format off
 
   //-------------------------------------------------------------------------
-  // Valid syntax, just as a sanity test. (More thorough testing of valid syntax
-  // and semantics is done in modules/libpref/test/unit/test_parser.js.)
+  // Valid syntax. (Other testing of more typical valid syntax and semantics is
+  // done in modules/libpref/test/unit/test_parser.js.)
   //-------------------------------------------------------------------------
 
+  // Normal prefs.
   P(R"(
 pref("bool", true);
 sticky_pref("int", 123);
 user_pref("string", "value");
     )",
     ""
   );
 
+  // Totally empty input.
+  P("",
+    ""
+  );
+
+  // Whitespace-only input.
+  P(R"(   
+		
+    )" "\v \t \v \f",
+    ""
+  );
+
   //-------------------------------------------------------------------------
   // All the lexing errors. (To be pedantic, some of the integer literal
   // overflows are triggered in the parser, but put them all here so they're all
   // in the one spot.)
   //-------------------------------------------------------------------------
 
   // Integer overflow errors.
-
   P(R"(
 pref("int.ok", 2147483647);
 pref("int.overflow", 2147483648);
-    )",
-    "test:3: prefs parse error: integer literal overflowed");
-
-  P(R"(
 pref("int.ok", +2147483647);
 pref("int.overflow", +2147483648);
-    )",
-    "test:3: prefs parse error: integer literal overflowed"
-  );
-
-  P(R"(
 pref("int.ok", -2147483648);
 pref("int.overflow", -2147483649);
-    )",
-    "test:3: prefs parse error: integer literal overflowed"
-  );
-
-  P(R"(
 pref("int.overflow", 4294967296);
-    )",
-    "test:2: prefs parse error: integer literal overflowed"
-  );
-
-  P(R"(
 pref("int.overflow", +4294967296);
-    )",
-    "test:2: prefs parse error: integer literal overflowed"
-  );
-
-  P(R"(
 pref("int.overflow", -4294967296);
-    )",
-    "test:2: prefs parse error: integer literal overflowed"
-  );
-
-  P(R"(
 pref("int.overflow", 4294967297);
-    )",
-    "test:2: prefs parse error: integer literal overflowed"
-  );
-
-  P(R"(
 pref("int.overflow", 1234567890987654321);
     )",
-    "test:2: prefs parse error: integer literal overflowed"
+    "test:3: prefs parse error: integer literal overflowed\n"
+    "test:5: prefs parse error: integer literal overflowed\n"
+    "test:7: prefs parse error: integer literal overflowed\n"
+    "test:8: prefs parse error: integer literal overflowed\n"
+    "test:9: prefs parse error: integer literal overflowed\n"
+    "test:10: prefs parse error: integer literal overflowed\n"
+    "test:11: prefs parse error: integer literal overflowed\n"
+    "test:12: prefs parse error: integer literal overflowed\n"
   );
 
   // Other integer errors.
-
   P(R"(
 pref("int.unexpected", 100foo);
+pref("int.ok", 0);
     )",
-    "test:2: prefs parse error: unexpected character in integer literal"
+    "test:2: prefs parse error: unexpected character in integer literal\n"
   );
 
-  // \x escape errors.
-
   // \x00 is not allowed.
   P(R"(
 pref("string.bad-x-escape", "foo\x00bar");
-    )",
-    "test:2: prefs parse error: \\x00 is not allowed"
-  );
-
-  // End of string after \x.
-  P(R"(
-pref("string.bad-x-escape", "foo\x");
+pref("int.ok", 0);
     )",
-    "test:2: prefs parse error: malformed \\x escape sequence"
-  );
-
-  // Punctuation after \x.
-  P(R"(
-pref("string.bad-x-escape", "foo\x,bar");
-    )",
-    "test:2: prefs parse error: malformed \\x escape sequence"
+    "test:2: prefs parse error: \\x00 is not allowed\n"
   );
 
-  // Space after \x.
+  // Various bad things after \x: end of string, punctuation, space, newline,
+  // EOF.
   P(R"(
+pref("string.bad-x-escape", "foo\x");
+pref("string.bad-x-escape", "foo\x,bar");
 pref("string.bad-x-escape", "foo\x 12");
-    )",
-    "test:2: prefs parse error: malformed \\x escape sequence"
-  );
-
-  // Newline after \x.
-  P(R"(
 pref("string.bad-x-escape", "foo\x
 12");
-    )",
-    "test:2: prefs parse error: malformed \\x escape sequence"
-  );
-
-  // EOF after \x.
-  P(R"(
 pref("string.bad-x-escape", "foo\x)",
-    "test:2: prefs parse error: malformed \\x escape sequence"
+    "test:2: prefs parse error: malformed \\x escape sequence\n"
+    "test:3: prefs parse error: malformed \\x escape sequence\n"
+    "test:4: prefs parse error: malformed \\x escape sequence\n"
+    "test:5: prefs parse error: malformed \\x escape sequence\n"
+    "test:7: prefs parse error: malformed \\x escape sequence\n"
   );
 
   // Not enough hex digits.
   P(R"(
 pref("string.bad-x-escape", "foo\x1");
+pref("int.ok", 0);
     )",
-    "test:2: prefs parse error: malformed \\x escape sequence"
+    "test:2: prefs parse error: malformed \\x escape sequence\n"
   );
 
   // Invalid hex digit.
   P(R"(
 pref("string.bad-x-escape", "foo\x1G");
+pref("int.ok", 0);
     )",
-    "test:2: prefs parse error: malformed \\x escape sequence"
+    "test:2: prefs parse error: malformed \\x escape sequence\n"
   );
 
-  // \u escape errors.
-
   // \u0000 is not allowed.
   // (The string literal is broken in two so that MSVC doesn't complain about
   // an invalid universal-character-name.)
   P(R"(
 pref("string.bad-u-escape", "foo\)" R"(u0000 bar");
-    )",
-    "test:2: prefs parse error: \\u0000 is not allowed"
-  );
-
-  // End of string after \u.
-  P(R"(
-pref("string.bad-u-escape", "foo\u");
+pref("int.ok", 0);
     )",
-    "test:2: prefs parse error: malformed \\u escape sequence"
-  );
-
-  // Punctuation after \u.
-  P(R"(
-pref("string.bad-u-escape", "foo\u,bar");
-    )",
-    "test:2: prefs parse error: malformed \\u escape sequence"
+    "test:2: prefs parse error: \\u0000 is not allowed\n"
   );
 
-  // Space after \u.
+  // Various bad things after \u: end of string, punctuation, space, newline,
+  // EOF.
   P(R"(
+pref("string.bad-u-escape", "foo\u");
+pref("string.bad-u-escape", "foo\u,bar");
 pref("string.bad-u-escape", "foo\u 1234");
-    )",
-    "test:2: prefs parse error: malformed \\u escape sequence"
-  );
-
-  // Newline after \u.
-  P(R"(
 pref("string.bad-u-escape", "foo\u
 1234");
-    )",
-    "test:2: prefs parse error: malformed \\u escape sequence"
-  );
-
-  // EOF after \u.
-  P(R"(
 pref("string.bad-u-escape", "foo\u)",
-    "test:2: prefs parse error: malformed \\u escape sequence"
+    "test:2: prefs parse error: malformed \\u escape sequence\n"
+    "test:3: prefs parse error: malformed \\u escape sequence\n"
+    "test:4: prefs parse error: malformed \\u escape sequence\n"
+    "test:5: prefs parse error: malformed \\u escape sequence\n"
+    "test:7: prefs parse error: malformed \\u escape sequence\n"
   );
 
   // Not enough hex digits.
   P(R"(
 pref("string.bad-u-escape", "foo\u1");
-    )",
-    "test:2: prefs parse error: malformed \\u escape sequence"
-  );
-
-  // Not enough hex digits.
-  P(R"(
 pref("string.bad-u-escape", "foo\u12");
+pref("string.bad-u-escape", "foo\u123");
+pref("int.ok", 0);
     )",
-    "test:2: prefs parse error: malformed \\u escape sequence"
-  );
-
-  // Not enough hex digits.
-  P(R"(
-pref("string.bad-u-escape", "foo\u123");
-    )",
-    "test:2: prefs parse error: malformed \\u escape sequence"
+    "test:2: prefs parse error: malformed \\u escape sequence\n"
+    "test:3: prefs parse error: malformed \\u escape sequence\n"
+    "test:4: prefs parse error: malformed \\u escape sequence\n"
   );
 
   // Invalid hex digit.
   P(R"(
 pref("string.bad-u-escape", "foo\u1G34");
+pref("int.ok", 0);
     )",
-    "test:2: prefs parse error: malformed \\u escape sequence"
+    "test:2: prefs parse error: malformed \\u escape sequence\n"
   );
 
   // High surrogate not followed by low surrogate.
   // (The string literal is broken in two so that MSVC doesn't complain about
   // an invalid universal-character-name.)
   P(R"(
 pref("string.bad-u-surrogate", "foo\)" R"(ud83c,blah");
+pref("int.ok", 0);
     )",
-    "test:2: prefs parse error: expected low surrogate after high surrogate"
+    "test:2: prefs parse error: expected low surrogate after high surrogate\n"
   );
 
   // High surrogate followed by invalid low surrogate value.
   // (The string literal is broken in two so that MSVC doesn't complain about
   // an invalid universal-character-name.)
   P(R"(
 pref("string.bad-u-surrogate", "foo\)" R"(ud83c\u1234");
+pref("int.ok", 0);
     )",
-    "test:2: prefs parse error: invalid low surrogate value after high surrogate"
-  );
-
-  // Bad escape characters.
-
-  // Unlike in JavaScript, \b isn't allowed.
-  P(R"(
-pref("string.bad-escape", "foo\v");
-    )",
-    "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+    "test:2: prefs parse error: invalid low surrogate value after high surrogate\n"
   );
 
-  // Unlike in JavaScript, \f isn't allowed.
+  // Unlike in JavaScript, \b, \f, \t, \v aren't allowed.
   P(R"(
+pref("string.bad-escape", "foo\b");
 pref("string.bad-escape", "foo\f");
+pref("string.bad-escape", "foo\t");
+pref("string.bad-escape", "foo\v");
+pref("int.ok", 0);
     )",
-    "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+    "test:2: prefs parse error: unexpected escape sequence character after '\\'\n"
+    "test:3: prefs parse error: unexpected escape sequence character after '\\'\n"
+    "test:4: prefs parse error: unexpected escape sequence character after '\\'\n"
+    "test:5: prefs parse error: unexpected escape sequence character after '\\'\n"
   );
 
-  // Unlike in JavaScript, \t isn't allowed.
-  P(R"(
-pref("string.bad-escape", "foo\t");
-    )",
-    "test:2: prefs parse error: unexpected escape sequence character after '\\'"
-  );
-
-  // Unlike in JavaScript, \v isn't allowed.
-  P(R"(
-pref("string.bad-escape", "foo\v");
-    )",
-    "test:2: prefs parse error: unexpected escape sequence character after '\\'"
-  );
-
-  // Non-special letter after \.
+  // Various bad things after \: non-special letter, number, punctuation,
+  // space, newline, EOF.
   P(R"(
 pref("string.bad-escape", "foo\Q");
-    )",
-    "test:2: prefs parse error: unexpected escape sequence character after '\\'"
-  );
-
-  // Number after \.
-  P(R"(
 pref("string.bad-escape", "foo\1");
-    )",
-    "test:2: prefs parse error: unexpected escape sequence character after '\\'"
-  );
-
-  // Punctuation after \.
-  P(R"(
 pref("string.bad-escape", "foo\,");
-    )",
-    "test:2: prefs parse error: unexpected escape sequence character after '\\'"
-  );
-
-  // Space after \.
-  P(R"(
 pref("string.bad-escape", "foo\ n");
-    )",
-    "test:2: prefs parse error: unexpected escape sequence character after '\\'"
-  );
-
-  // Newline after \.
-  P(R"(
 pref("string.bad-escape", "foo\
 n");
-    )",
-    "test:2: prefs parse error: unexpected escape sequence character after '\\'"
-  );
-
-  // EOF after \.
-  P(R"(
 pref("string.bad-escape", "foo\)",
-    "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+    "test:2: prefs parse error: unexpected escape sequence character after '\\'\n"
+    "test:3: prefs parse error: unexpected escape sequence character after '\\'\n"
+    "test:4: prefs parse error: unexpected escape sequence character after '\\'\n"
+    "test:5: prefs parse error: unexpected escape sequence character after '\\'\n"
+    "test:6: prefs parse error: unexpected escape sequence character after '\\'\n"
+    "test:8: prefs parse error: unexpected escape sequence character after '\\'\n"
   );
 
   // Unterminated string literals.
 
   // Simple case.
   P(R"(
 pref("string.unterminated-string", "foo
     )",
-    "test:3: prefs parse error: unterminated string literal"
+    "test:3: prefs parse error: unterminated string literal\n"
+  );
+
+  // Alternative case; `int` comes after the string and is seen as a keyword.
+  // The parser then skips to the ';', so no error about the unterminated
+  // string is issued.
+  P(R"(
+pref("string.unterminated-string", "foo);
+pref("int.ok", 0);
+    )",
+    "test:3: prefs parse error: unknown keyword\n"
   );
 
   // Mismatched quotes (1).
   P(R"(
 pref("string.unterminated-string", "foo');
     )",
-    "test:3: prefs parse error: unterminated string literal"
+    "test:3: prefs parse error: unterminated string literal\n"
   );
 
   // Mismatched quotes (2).
   P(R"(
 pref("string.unterminated-string", 'foo");
     )",
-    "test:3: prefs parse error: unterminated string literal"
-  );
-
-  // Unknown keywords
-
-  P(R"(
-foo
-    )",
-    "test:2: prefs parse error: unknown keyword"
-  );
-
-  P(R"(
-preff("string.bad-keyword", true);
-    )",
-    "test:2: prefs parse error: unknown keyword"
+    "test:3: prefs parse error: unterminated string literal\n"
   );
 
+  // Unknown keywords.
   P(R"(
+foo;
+preff("string.bad-keyword", true);
 ticky_pref("string.bad-keyword", true);
+User_pref("string.bad-keyword", true);
+pref("string.bad-keyword", TRUE);
     )",
-    "test:2: prefs parse error: unknown keyword"
+    "test:2: prefs parse error: unknown keyword\n"
+    "test:3: prefs parse error: unknown keyword\n"
+    "test:4: prefs parse error: unknown keyword\n"
+    "test:5: prefs parse error: unknown keyword\n"
+    "test:6: prefs parse error: unknown keyword\n"
   );
 
-  P(R"(
-User_pref("string.bad-keyword", true);
-    )",
-    "test:2: prefs parse error: unknown keyword"
-  );
-
-  P(R"(
-pref("string.bad-keyword", TRUE);
-    )",
-    "test:2: prefs parse error: unknown keyword"
-  );
-
-  // Unterminated C-style comment
+  // Unterminated C-style comment.
   P(R"(
 /* comment
     )",
-    "test:3: prefs parse error: unterminated /* comment"
+    "test:3: prefs parse error: unterminated /* comment\n"
   );
 
-  // Malformed comments.
-
+  // Malformed comment.
   P(R"(
 / comment
     )",
-    "test:2: prefs parse error: expected '/' or '*' after '/'"
+    "test:2: prefs parse error: expected '/' or '*' after '/'\n"
   );
 
-  // Unexpected characters
-
+  // C++-style comment ending in EOF (1).
   P(R"(
-pref("unexpected.chars", &true);
-    )",
-    "test:2: prefs parse error: unexpected character"
+// comment)",
+    ""
   );
 
+  // C++-style comment ending in EOF (2).
   P(R"(
-pref("unexpected.chars" : true);
-    )",
-    "test:2: prefs parse error: unexpected character"
+//)",
+    ""
   );
 
+  // Various unexpected characters.
   P(R"(
+pref("unexpected.chars", &true);
+pref("unexpected.chars" : true);
 @pref("unexpected.chars", true);
-    )",
-    "test:2: prefs parse error: unexpected character"
-  );
-
-  P(R"(
 pref["unexpected.chars": true];
     )",
-    "test:2: prefs parse error: unexpected character"
+    "test:2: prefs parse error: unexpected character\n"
+    "test:3: prefs parse error: unexpected character\n"
+    "test:4: prefs parse error: unexpected character\n"
+    "test:5: prefs parse error: unexpected character\n"
   );
 
   //-------------------------------------------------------------------------
   // All the parsing errors.
   //-------------------------------------------------------------------------
 
   P(R"(
 "pref"("parse.error": true);
-    )",
-    "test:2: prefs parse error: expected pref specifier at start of pref definition"
-  );
-
-  P(R"(
 pref1("parse.error": true);
-    )",
-    "test:2: prefs parse error: expected '(' after pref specifier"
+pref(123: true);
+pref("parse.error" true);
+pref("parse.error", pref);
+pref("parse.error", -true);
+pref("parse.error", +"value");
+pref("parse.error", true;
+pref("parse.error", true)
+pref("int.ok", 1);
+pref("parse.error", true))",
+    "test:2: prefs parse error: expected pref specifier at start of pref definition\n"
+    "test:3: prefs parse error: expected '(' after pref specifier\n"
+    "test:4: prefs parse error: expected pref name after '('\n"
+    "test:5: prefs parse error: expected ',' after pref name\n"
+    "test:6: prefs parse error: expected pref value after ','\n"
+    "test:7: prefs parse error: expected integer literal after '-'\n"
+    "test:8: prefs parse error: expected integer literal after '+'\n"
+    "test:9: prefs parse error: expected ')' after pref value\n"
+    "test:11: prefs parse error: expected ';' after ')'\n"
+    "test:12: prefs parse error: expected ';' after ')'\n"
   );
 
-  P(R"(
-pref(123: true);
-    )",
-    "test:2: prefs parse error: expected pref name after '('"
-  );
+  // Parse errors involving unexpected EOF.
 
   P(R"(
-pref("parse.error" true);
-    )",
-    "test:2: prefs parse error: expected ',' after pref name"
+pref)",
+    "test:2: prefs parse error: expected '(' after pref specifier\n"
   );
 
   P(R"(
-pref("parse.error", -true);
-    )",
-    "test:2: prefs parse error: expected integer literal after '-'"
+pref()",
+    "test:2: prefs parse error: expected pref name after '('\n"
   );
 
   P(R"(
-pref("parse.error", +"value");
-    )",
-    "test:2: prefs parse error: expected integer literal after '+'"
+pref("parse.error")",
+    "test:2: prefs parse error: expected ',' after pref name\n"
+  );
+
+  P(R"(
+pref("parse.error",)",
+    "test:2: prefs parse error: expected pref value after ','\n"
   );
 
   P(R"(
-pref("parse.error", pref);
-    )",
-    "test:2: prefs parse error: expected pref value after ','"
+pref("parse.error", -)",
+    "test:2: prefs parse error: expected integer literal after '-'\n"
   );
 
   P(R"(
-pref("parse.error", true;
-    )",
-    "test:2: prefs parse error: expected ')' after pref value"
+pref("parse.error", +)",
+    "test:2: prefs parse error: expected integer literal after '+'\n"
   );
 
   P(R"(
-pref("parse.error", true)
-pref("parse.error", true)
-    )",
-    "test:3: prefs parse error: expected ';' after ')'"
+pref("parse.error", true)",
+    "test:2: prefs parse error: expected ')' after pref value\n"
+  );
+
+  P(R"(
+pref("parse.error", true))",
+    "test:2: prefs parse error: expected ';' after ')'\n"
   );
 
   // This is something we saw in practice with the old parser, which allowed
   // repeated semicolons.
   P(R"(
 pref("parse.error", true);;
+pref("parse.error", true);;;
+pref("parse.error", true);;;;
+pref("int.ok", 0);
     )",
-    "test:2: prefs parse error: expected pref specifier at start of pref definition"
+    "test:2: prefs parse error: expected pref specifier at start of pref definition\n"
+    "test:3: prefs parse error: expected pref specifier at start of pref definition\n"
+    "test:3: prefs parse error: expected pref specifier at start of pref definition\n"
+    "test:4: prefs parse error: expected pref specifier at start of pref definition\n"
+    "test:4: prefs parse error: expected pref specifier at start of pref definition\n"
+    "test:4: prefs parse error: expected pref specifier at start of pref definition\n"
   );
 
   //-------------------------------------------------------------------------
   // Invalid syntax after various newline combinations, for the purpose of
   // testing that line numbers are correct.
   //-------------------------------------------------------------------------
 
   // In all of the following we have a \n, a \r, a \r\n, and then an error, so
   // the error is on line 4. (Note: these ones don't use raw string literals
   // because MSVC somehow swallows any \r that appears in them.)
 
   P("\n \r \r\n bad",
-    "test:4: prefs parse error: unknown keyword"
+    "test:4: prefs parse error: unknown keyword\n"
   );
 
   P("#\n#\r#\r\n bad",
-    "test:4: prefs parse error: unknown keyword"
+    "test:4: prefs parse error: unknown keyword\n"
   );
 
   P("//\n//\r//\r\n bad",
-    "test:4: prefs parse error: unknown keyword"
+    "test:4: prefs parse error: unknown keyword\n"
   );
 
   P("/*\n \r \r\n*/ bad",
-    "test:4: prefs parse error: unknown keyword"
+    "test:4: prefs parse error: unknown keyword\n"
   );
 
   // Note: the escape sequences do *not* affect the line number.
   P("pref(\"foo\\n\n foo\\r\r foo\\r\\n\r\n foo\", bad);",
-    "test:4: prefs parse error: unknown keyword"
+    "test:4: prefs parse error: unknown keyword\n"
   );
 
   // clang-format on
 }
author	Nicholas Nethercote <nnethercote@mozilla.com>
	Mon, 19 Feb 2018 16:55:48 +1100
changeset 756884	2c21e3179b7c7bc83a74629f94fa4aeebae6f534
parent 756883	a5e20bb76cf3ae1cf2c4b48e2e386ef14798f8e6
push id	99575
push user	nnethercote@mozilla.com
push date	Mon, 19 Feb 2018 05:58:17 +0000
reviewers	glandium
bugs	107264
milestone	60.0a1
modules/libpref/Preferences.cpp		file \| annotate \| diff \| comparison \| revisions
modules/libpref/parser/src/lib.rs		file \| annotate \| diff \| comparison \| revisions
modules/libpref/test/gtest/Parser.cpp		file \| annotate \| diff \| comparison \| revisions