--- a/modules/libpref/Preferences.cpp
+++ b/modules/libpref/Preferences.cpp
@@ -1127,39 +1127,39 @@ static void
TestParseErrorHandlePref(const char* aPrefName,
PrefType aType,
PrefValueKind aKind,
PrefValue aValue,
bool aIsSticky)
{
}
-static char* gTestParseErrorMsg;
+static nsCString gTestParseErrorMsgs;
static void
TestParseErrorHandleError(const char* aMsg)
{
- // aMsg's lifetime is shorter than we need, so duplicate it.
- gTestParseErrorMsg = moz_xstrdup(aMsg);
+ gTestParseErrorMsgs.Append(aMsg);
+ gTestParseErrorMsgs.Append('\n');
}
// Keep this in sync with the declaration in test/gtest/Parser.cpp.
void
TestParseError(const char* aText, nsCString& aErrorMsg)
{
prefs_parser_parse("test",
aText,
strlen(aText),
TestParseErrorHandlePref,
TestParseErrorHandleError);
- // Copy the duplicated error message into the outparam, then free it.
- aErrorMsg.Assign(gTestParseErrorMsg);
- free(gTestParseErrorMsg);
- gTestParseErrorMsg = nullptr;
+ // Copy the error messages into the outparam, then clear them from
+ // gTestParseErrorMsgs.
+ aErrorMsg.Assign(gTestParseErrorMsgs);
+ gTestParseErrorMsgs.Truncate();
}
void
SendTelemetryLoadData()
{
for (auto iter = gTelemetryLoadData->Iter(); !iter.Done(); iter.Next()) {
const nsCString& filename = PromiseFlatCString(iter.Key());
const TelemetryLoadData& data = iter.Data();
--- a/modules/libpref/parser/src/lib.rs
+++ b/modules/libpref/parser/src/lib.rs
@@ -36,16 +36,21 @@
//! - \r\n
//!
//! The valid range for <int-value> is -2,147,483,648..2,147,483,647. Values
//! outside that range will result in a parse error.
//!
//! A '\0' char is interpreted as the end of the file. The use of this character
//! in a prefs file is not recommended. Within string literals \x00 or \u0000
//! can be used instead.
+//!
+//! The parser performs error recovery. On a syntax error, it will scan forward
+//! to the next ';' token and then continue parsing. If the syntax error occurs
+//! in the middle of a token, it will first finish obtaining the current token
+//! in an appropriate fashion.
// This parser uses several important optimizations.
//
// - Because "'\0' means EOF" is part of the grammar (see above) we can match
// EOF as a normal char/token, which means we can avoid a typical "do we
// still have chars remaining?" test in get_char(), which gives a speedup
// because get_char() is a very hot function. (Actually, Rust would
// bounds-check this function anyway, so we have get_char_unchecked() which
@@ -112,16 +117,20 @@ type PrefFn = unsafe extern "C" fn(pref_
/// Keep this in sync with PrefsParserErrorFn in Preferences.cpp.
type ErrorFn = unsafe extern "C" fn(msg: *const c_char);
/// Parse the contents of a prefs file.
///
/// `buf` is a null-terminated string. `len` is its length, excluding the
/// null terminator.
///
+/// `pref_fn` is called once for each successfully parsed pref.
+///
+/// `error_fn` is called once for each parse error detected.
+///
/// Keep this in sync with the prefs_parser_parse() declaration in
/// Preferences.cpp.
#[no_mangle]
pub extern "C" fn prefs_parser_parse(path: *const c_char, buf: *const c_char, len: usize,
pref_fn: PrefFn, error_fn: ErrorFn) -> bool {
let path = unsafe { std::ffi::CStr::from_ptr(path).to_string_lossy().into_owned() };
// Make sure `buf` ends in a '\0', and include that in the length, because
@@ -157,16 +166,23 @@ enum Token {
// tokens, so these token values are always positive. Furthermore, we
// tokenize int literals as u32 so that 2147483648 (which doesn't fit into
// an i32) can be subsequently negated to -2147483648 (which does fit into
// an i32) if a '-' token precedes it.
Int(u32),
// Malformed token.
Error(&'static str),
+
+ // Malformed token at a particular line number. For use when
+ // Parser::line_num might not be the right line number when the error is
+ // reported. E.g. if a multi-line string has a bad escape sequence on the
+ // first line, we don't report the error until the string's end has been
+ // reached.
+ ErrorAtLine(&'static str, u32),
}
// We categorize every char by what action should be taken when it appears at
// the start of a new token.
#[derive(Clone, Copy, PartialEq)]
enum CharKind {
// These are ordered by frequency. See the comment in GetToken().
SingleChar, // Unambiguous single-char tokens: [()+,-]
@@ -267,16 +283,17 @@ const KEYWORD_INFOS: &[KeywordInfo; 5] =
struct Parser<'t> {
path: &'t str, // Path to the file being parsed. Used in error messages.
buf: &'t [u8], // Text being parsed.
i: usize, // Index of next char to be read.
line_num: u32, // Current line number within the text.
pref_fn: PrefFn, // Callback for processing each pref.
error_fn: ErrorFn, // Callback for parse errors.
+ has_errors: bool, // Have we encountered errors?
}
// As described above, we use 0 to represent EOF.
const EOF: u8 = b'\0';
impl<'t> Parser<'t> {
fn new(path: &'t str, buf: &'t [u8], pref_fn: PrefFn, error_fn: ErrorFn) -> Parser<'t> {
// Make sure these tables take up 1 byte per entry.
@@ -285,74 +302,69 @@ impl<'t> Parser<'t> {
Parser {
path: path,
buf: buf,
i: 0,
line_num: 1,
pref_fn: pref_fn,
error_fn: error_fn,
+ has_errors: false,
}
}
fn parse(&mut self) -> bool {
// These are reused, because allocating a new Vec for every string is slow.
let mut name_str = Vec::with_capacity(128); // For pref names.
let mut value_str = Vec::with_capacity(512); // For string pref values.
let mut none_str = Vec::with_capacity(0); // For tokens that shouldn't be strings.
- loop {
- // Note: if you add error recovery here, be aware that the
- // erroneous char may have been the text-ending EOF, in which case
- // self.i will point one past the end of the text. You should check
- // for that possibility before getting more chars.
+ let mut token = self.get_token(&mut none_str);
- // EOF?
- let token = self.get_token(&mut none_str);
- if token == Token::SingleChar(EOF) {
- break;
- }
-
+ // At the top of the loop we already have a token. In a valid input
+ // this will be either the first token of a new pref, or EOF.
+ loop {
// <pref-spec>
let (pref_value_kind, is_sticky) = match token {
- Token::Pref => {
- (PrefValueKind::Default, false)
- }
- Token::StickyPref => {
- (PrefValueKind::Default, true)
+ Token::Pref => (PrefValueKind::Default, false),
+ Token::StickyPref => (PrefValueKind::Default, true),
+ Token::UserPref => (PrefValueKind::User, false),
+ Token::SingleChar(EOF) => return !self.has_errors,
+ _ => {
+ token = self.error(token,
+ "expected pref specifier at start of pref definition");
+ continue;
}
- Token::UserPref => {
- (PrefValueKind::User, false)
- }
- _ => return self.error(token,
- "expected pref specifier at start of pref definition")
};
// "("
- let token = self.get_token(&mut none_str);
+ token = self.get_token(&mut none_str);
if token != Token::SingleChar(b'(') {
- return self.error(token, "expected '(' after pref specifier");
+ token = self.error(token, "expected '(' after pref specifier");
+ continue;
}
// <pref-name>
- let token = self.get_token(&mut name_str);
+ token = self.get_token(&mut name_str);
let pref_name = if token == Token::String {
&name_str
} else {
- return self.error(token, "expected pref name after '('");
+ token = self.error(token, "expected pref name after '('");
+ continue;
};
// ","
- let token = self.get_token(&mut none_str);
+ token = self.get_token(&mut none_str);
if token != Token::SingleChar(b',') {
- return self.error(token, "expected ',' after pref name");
+ token = self.error(token, "expected ',' after pref name");
+ continue;
}
// <pref-value>
- let token = self.get_token(&mut value_str);
+ token = self.get_token(&mut value_str);
let (pref_type, pref_value) = match token {
Token::True => {
(PrefType::Bool, PrefValue { bool_val: true })
}
Token::False => {
(PrefType::Bool, PrefValue { bool_val: false })
}
Token::String => {
@@ -360,99 +372,130 @@ impl<'t> Parser<'t> {
PrefValue { string_val: value_str.as_ptr() as *const c_char })
}
Token::Int(u) => {
// Accept u <= 2147483647; anything larger will overflow i32.
if u <= std::i32::MAX as u32 {
(PrefType::Int, PrefValue { int_val: u as i32 })
} else {
- return self.error(Token::Error("integer literal overflowed"), "");
+ token = self.error(Token::Error("integer literal overflowed"), "");
+ continue;
}
-
}
Token::SingleChar(b'-') => {
- let token = self.get_token(&mut none_str);
+ token = self.get_token(&mut none_str);
if let Token::Int(u) = token {
// Accept u <= 2147483648; anything larger will overflow i32 once negated.
if u <= std::i32::MAX as u32 {
(PrefType::Int, PrefValue { int_val: -(u as i32) })
} else if u == std::i32::MAX as u32 + 1 {
(PrefType::Int, PrefValue { int_val: std::i32::MIN })
} else {
- return self.error(Token::Error("integer literal overflowed"), "");
+ token = self.error(Token::Error("integer literal overflowed"), "");
+ continue;
}
} else {
- return self.error(token, "expected integer literal after '-'");
+ token = self.error(token, "expected integer literal after '-'");
+ continue;
}
}
Token::SingleChar(b'+') => {
- let token = self.get_token(&mut none_str);
+ token = self.get_token(&mut none_str);
if let Token::Int(u) = token {
// Accept u <= 2147483647; anything larger will overflow i32.
if u <= std::i32::MAX as u32 {
(PrefType::Int, PrefValue { int_val: u as i32 })
} else {
- return self.error(Token::Error("integer literal overflowed"), "");
+ token = self.error(Token::Error("integer literal overflowed"), "");
+ continue;
}
} else {
- return self.error(token, "expected integer literal after '+'");
+ token = self.error(token, "expected integer literal after '+'");
+ continue;
}
}
- _ => return self.error(token, "expected pref value after ','")
+ _ => {
+ token = self.error(token, "expected pref value after ','");
+ continue;
+ }
};
// ")"
- let token = self.get_token(&mut none_str);
+ token = self.get_token(&mut none_str);
if token != Token::SingleChar(b')') {
- return self.error(token, "expected ')' after pref value");
+ token = self.error(token, "expected ')' after pref value");
+ continue;
}
// ";"
- let token = self.get_token(&mut none_str);
+ token = self.get_token(&mut none_str);
if token != Token::SingleChar(b';') {
- return self.error(token, "expected ';' after ')'");
+ token = self.error(token, "expected ';' after ')'");
+ continue;
}
unsafe { (self.pref_fn)(pref_name.as_ptr() as *const c_char, pref_type, pref_value_kind,
pref_value, is_sticky) };
+
+ token = self.get_token(&mut none_str);
}
-
- true
}
- fn error(&self, token: Token, msg: &str) -> bool {
- // If `token` is a Token::Error, it's a lexing error and the error
- // message is within `token`. Otherwise, it's a parsing error and the
- // error message is in `msg`.
- let msg = if let Token::Error(token_msg) = token {
- token_msg
- } else {
- msg
+ fn error(&mut self, token: Token, msg: &str) -> Token {
+ self.has_errors = true;
+
+ // If `token` is a Token::{Error,ErrorAtLine}, it's a lexing error and
+ // the error message is within `token`. Otherwise, it's a parsing error
+ // and the error message is in `msg`.
+ let (msg, line_num) = match token {
+ Token::Error(token_msg) => (token_msg, self.line_num),
+ Token::ErrorAtLine(token_msg, line_num) => (token_msg, line_num),
+ _ => (msg, self.line_num),
};
- let msg = format!("{}:{}: prefs parse error: {}", self.path, self.line_num, msg);
+ let msg = format!("{}:{}: prefs parse error: {}", self.path, line_num, msg);
let msg = std::ffi::CString::new(msg).unwrap();
unsafe { (self.error_fn)(msg.as_ptr() as *const c_char) };
- false
+ // "Panic-mode" recovery: consume tokens until one of the following
+ // occurs.
+ // - We hit a semicolon, whereupon we return the following token.
+ // - We hit EOF, whereupon we return EOF.
+ //
+ // For this to work, if the lexing functions hit EOF in an error case
+ // they must unget it so we can safely reget it here.
+ //
+ // If the starting token (passed in above) is EOF we must not get
+ // another token otherwise we will read past the end of `self.buf`.
+ let mut dummy_str = Vec::with_capacity(128);
+ let mut token = token;
+ loop {
+ match token {
+ Token::SingleChar(b';') => return self.get_token(&mut dummy_str),
+ Token::SingleChar(EOF) => return token,
+ _ => {}
+ }
+ token = self.get_token(&mut dummy_str);
+ }
}
#[inline(always)]
fn get_char(&mut self) -> u8 {
let c = self.buf[self.i];
self.i += 1;
c
}
- // This function skips the bounds check. Using it at the hottest two call
- // sites gives a ~15% parsing speed boost.
+ // This function skips the bounds check in non-optimized builds. Using it
+ // at the hottest two call sites gives a ~15% parsing speed boost.
#[inline(always)]
unsafe fn get_char_unchecked(&mut self) -> u8 {
+ debug_assert!(self.i < self.buf.len());
let c = *self.buf.get_unchecked(self.i);
self.i += 1;
c
}
#[inline(always)]
fn unget_char(&mut self) {
debug_assert!(self.i > 0);
@@ -486,19 +529,17 @@ impl<'t> Parser<'t> {
break;
}
b'\r' => {
self.line_num += 1;
self.match_char(b'\n');
break;
}
EOF => {
- // We must unget the EOF otherwise we'll read past it the
- // next time around the main loop in get_token(), violating
- // self.buf's bounds.
+ // Unget EOF so subsequent calls to get_char() are safe.
self.unget_char();
break;
}
_ => continue
}
}
}
@@ -515,33 +556,39 @@ impl<'t> Parser<'t> {
b'\n' => {
self.line_num += 1;
}
b'\r' => {
self.line_num += 1;
self.match_char(b'\n');
}
EOF => {
+ // Unget EOF so subsequent calls to get_char() are safe.
+ self.unget_char();
return false
}
_ => continue
}
}
}
fn match_hex_digits(&mut self, ndigits: i32) -> Option<u16> {
debug_assert!(ndigits == 2 || ndigits == 4);
let mut value: u16 = 0;
for _ in 0..ndigits {
value = value << 4;
match self.get_char() {
c @ b'0'... b'9' => value += (c - b'0') as u16,
c @ b'A'...b'F' => value += (c - b'A') as u16 + 10,
c @ b'a'...b'f' => value += (c - b'a') as u16 + 10,
- _ => return None
+ _ => {
+ // Unget in case the char was a closing quote or EOF.
+ self.unget_char();
+ return None;
+ }
}
}
Some(value)
}
#[inline(always)]
fn char_kind(c: u8) -> CharKind {
// Use get_unchecked() because a u8 index cannot exceed this table's
@@ -615,57 +662,65 @@ impl<'t> Parser<'t> {
return Token::Error("unterminated /* comment");
}
}
_ => return Token::Error("expected '/' or '*' after '/'")
}
continue;
}
CharKind::Digit => {
- let mut value = (c - b'0') as u32;
+ let mut value = Some((c - b'0') as u32);
loop {
let c = self.get_char();
match Parser::char_kind(c) {
CharKind::Digit => {
- fn add_digit(v: u32, c: u8) -> Option<u32> {
- v.checked_mul(10)?.checked_add((c - b'0') as u32)
+ fn add_digit(value: Option<u32>, c: u8) -> Option<u32> {
+ value?.checked_mul(10)?.checked_add((c - b'0') as u32)
}
- if let Some(v) = add_digit(value, c) {
- value = v;
- } else {
- return Token::Error("integer literal overflowed");
- }
+ value = add_digit(value, c);
}
CharKind::Keyword => {
- // Reject things like "123foo".
- return Token::Error(
- "unexpected character in integer literal");
+ // Reject things like "123foo". Error recovery
+ // will retokenize from "foo" onward.
+ self.unget_char();
+ return Token::Error("unexpected character in integer literal");
}
_ => {
self.unget_char();
break;
}
}
}
- return Token::Int(value);
+ return match value {
+ Some(v) => Token::Int(v),
+ None => Token::Error("integer literal overflowed"),
+ };
}
CharKind::Hash => {
self.match_single_line_comment();
continue;
}
CharKind::CR => {
self.match_char(b'\n');
self.line_num += 1;
continue;
}
+ // Error recovery will retokenize from the next character.
_ => return Token::Error("unexpected character")
}
}
}
+ fn string_error_token(&self, token: &mut Token, msg: &'static str) {
+ // We only want to capture the first tokenization error within a string.
+ if *token == Token::String {
+ *token = Token::ErrorAtLine(msg, self.line_num);
+ }
+ }
+
// Always inline this because it has a single call site.
#[inline(always)]
fn get_string_token(&mut self, quote_char: u8, str_buf: &mut Vec<u8>) -> Token {
// First scan through the string to see if it contains any chars that
// need special handling.
let start = self.i;
let has_special_chars = loop {
// To reach here, the previous char must have been a quote
@@ -685,17 +740,22 @@ impl<'t> Parser<'t> {
if !has_special_chars {
str_buf.extend(&self.buf[start..self.i - 1]);
str_buf.push(b'\0');
return Token::String;
}
// There were special chars. Re-scan the string, filling in str_buf one
// char at a time.
+ //
+ // On error, we change `token` to an error token and then keep going to
+ // the end of the string literal. `str_buf` won't be used in that case.
self.i = start;
+ let mut token = Token::String;
+
loop {
let c = self.get_char();
let c2 = if !Parser::is_special_string_char(c) {
c
} else if c == quote_char {
break;
@@ -707,76 +767,93 @@ impl<'t> Parser<'t> {
b'n' => b'\n',
b'r' => b'\r',
b'x' => {
if let Some(value) = self.match_hex_digits(2) {
debug_assert!(value <= 0xff);
if value != 0 {
value as u8
} else {
- return Token::Error("\\x00 is not allowed");
+ self.string_error_token(&mut token, "\\x00 is not allowed");
+ continue;
}
} else {
- return Token::Error("malformed \\x escape sequence");
+ self.string_error_token(&mut token, "malformed \\x escape sequence");
+ continue;
}
}
b'u' => {
if let Some(value) = self.match_hex_digits(4) {
let mut utf16 = vec![value];
if 0xd800 == (0xfc00 & value) {
// High surrogate value. Look for the low surrogate value.
if self.match_char(b'\\') && self.match_char(b'u') {
if let Some(lo) = self.match_hex_digits(4) {
if 0xdc00 == (0xfc00 & lo) {
// Found a valid low surrogate.
utf16.push(lo);
} else {
- return Token::Error(
+ self.string_error_token(
+ &mut token,
"invalid low surrogate value after high surrogate");
+ continue;
}
}
}
if utf16.len() != 2 {
- return Token::Error(
- "expected low surrogate after high surrogate");
+ self.string_error_token(
+ &mut token, "expected low surrogate after high surrogate");
+ continue;
}
} else if value == 0 {
- return Token::Error("\\u0000 is not allowed");
+ self.string_error_token(&mut token, "\\u0000 is not allowed");
+ continue;
}
// Insert the UTF-16 sequence as UTF-8.
let utf8 = String::from_utf16(&utf16).unwrap();
str_buf.extend(utf8.as_bytes());
} else {
- return Token::Error("malformed \\u escape sequence");
+ self.string_error_token(&mut token, "malformed \\u escape sequence");
+ continue;
}
continue; // We don't want to str_buf.push(c2) below.
}
- _ => return Token::Error("unexpected escape sequence character after '\\'")
+ _ => {
+ // Unget in case the char an EOF.
+ self.unget_char();
+ self.string_error_token(
+ &mut token, "unexpected escape sequence character after '\\'");
+ continue;
+ }
}
} else if c == b'\n' {
self.line_num += 1;
c
} else if c == b'\r' {
self.line_num += 1;
if self.match_char(b'\n') {
str_buf.push(b'\r');
b'\n'
} else {
c
}
} else if c == EOF {
- return Token::Error("unterminated string literal");
+ // Unget EOF so subsequent calls to get_char() are safe.
+ self.unget_char();
+ self.string_error_token(&mut token, "unterminated string literal");
+ break;
} else {
// This case is only hit for the non-closing quote char.
debug_assert!((c == b'\'' || c == b'\"') && c != quote_char);
c
};
str_buf.push(c2);
}
str_buf.push(b'\0');
- return Token::String;
+
+ token
}
}
--- a/modules/libpref/test/gtest/Parser.cpp
+++ b/modules/libpref/test/gtest/Parser.cpp
@@ -23,503 +23,414 @@ TEST(PrefsParser, Errors)
do { \
TestParseError(text_, actualErrorMsg); \
ASSERT_STREQ(expectedErrorMsg_, actualErrorMsg.get()); \
} while (0)
// clang-format off
//-------------------------------------------------------------------------
- // Valid syntax, just as a sanity test. (More thorough testing of valid syntax
- // and semantics is done in modules/libpref/test/unit/test_parser.js.)
+ // Valid syntax. (Other testing of more typical valid syntax and semantics is
+ // done in modules/libpref/test/unit/test_parser.js.)
//-------------------------------------------------------------------------
+ // Normal prefs.
P(R"(
pref("bool", true);
sticky_pref("int", 123);
user_pref("string", "value");
)",
""
);
+ // Totally empty input.
+ P("",
+ ""
+ );
+
+ // Whitespace-only input.
+ P(R"(
+
+ )" "\v \t \v \f",
+ ""
+ );
+
//-------------------------------------------------------------------------
// All the lexing errors. (To be pedantic, some of the integer literal
// overflows are triggered in the parser, but put them all here so they're all
// in the one spot.)
//-------------------------------------------------------------------------
// Integer overflow errors.
-
P(R"(
pref("int.ok", 2147483647);
pref("int.overflow", 2147483648);
- )",
- "test:3: prefs parse error: integer literal overflowed");
-
- P(R"(
pref("int.ok", +2147483647);
pref("int.overflow", +2147483648);
- )",
- "test:3: prefs parse error: integer literal overflowed"
- );
-
- P(R"(
pref("int.ok", -2147483648);
pref("int.overflow", -2147483649);
- )",
- "test:3: prefs parse error: integer literal overflowed"
- );
-
- P(R"(
pref("int.overflow", 4294967296);
- )",
- "test:2: prefs parse error: integer literal overflowed"
- );
-
- P(R"(
pref("int.overflow", +4294967296);
- )",
- "test:2: prefs parse error: integer literal overflowed"
- );
-
- P(R"(
pref("int.overflow", -4294967296);
- )",
- "test:2: prefs parse error: integer literal overflowed"
- );
-
- P(R"(
pref("int.overflow", 4294967297);
- )",
- "test:2: prefs parse error: integer literal overflowed"
- );
-
- P(R"(
pref("int.overflow", 1234567890987654321);
)",
- "test:2: prefs parse error: integer literal overflowed"
+ "test:3: prefs parse error: integer literal overflowed\n"
+ "test:5: prefs parse error: integer literal overflowed\n"
+ "test:7: prefs parse error: integer literal overflowed\n"
+ "test:8: prefs parse error: integer literal overflowed\n"
+ "test:9: prefs parse error: integer literal overflowed\n"
+ "test:10: prefs parse error: integer literal overflowed\n"
+ "test:11: prefs parse error: integer literal overflowed\n"
+ "test:12: prefs parse error: integer literal overflowed\n"
);
// Other integer errors.
-
P(R"(
pref("int.unexpected", 100foo);
+pref("int.ok", 0);
)",
- "test:2: prefs parse error: unexpected character in integer literal"
+ "test:2: prefs parse error: unexpected character in integer literal\n"
);
- // \x escape errors.
-
// \x00 is not allowed.
P(R"(
pref("string.bad-x-escape", "foo\x00bar");
- )",
- "test:2: prefs parse error: \\x00 is not allowed"
- );
-
- // End of string after \x.
- P(R"(
-pref("string.bad-x-escape", "foo\x");
+pref("int.ok", 0);
)",
- "test:2: prefs parse error: malformed \\x escape sequence"
- );
-
- // Punctuation after \x.
- P(R"(
-pref("string.bad-x-escape", "foo\x,bar");
- )",
- "test:2: prefs parse error: malformed \\x escape sequence"
+ "test:2: prefs parse error: \\x00 is not allowed\n"
);
- // Space after \x.
+ // Various bad things after \x: end of string, punctuation, space, newline,
+ // EOF.
P(R"(
+pref("string.bad-x-escape", "foo\x");
+pref("string.bad-x-escape", "foo\x,bar");
pref("string.bad-x-escape", "foo\x 12");
- )",
- "test:2: prefs parse error: malformed \\x escape sequence"
- );
-
- // Newline after \x.
- P(R"(
pref("string.bad-x-escape", "foo\x
12");
- )",
- "test:2: prefs parse error: malformed \\x escape sequence"
- );
-
- // EOF after \x.
- P(R"(
pref("string.bad-x-escape", "foo\x)",
- "test:2: prefs parse error: malformed \\x escape sequence"
+ "test:2: prefs parse error: malformed \\x escape sequence\n"
+ "test:3: prefs parse error: malformed \\x escape sequence\n"
+ "test:4: prefs parse error: malformed \\x escape sequence\n"
+ "test:5: prefs parse error: malformed \\x escape sequence\n"
+ "test:7: prefs parse error: malformed \\x escape sequence\n"
);
// Not enough hex digits.
P(R"(
pref("string.bad-x-escape", "foo\x1");
+pref("int.ok", 0);
)",
- "test:2: prefs parse error: malformed \\x escape sequence"
+ "test:2: prefs parse error: malformed \\x escape sequence\n"
);
// Invalid hex digit.
P(R"(
pref("string.bad-x-escape", "foo\x1G");
+pref("int.ok", 0);
)",
- "test:2: prefs parse error: malformed \\x escape sequence"
+ "test:2: prefs parse error: malformed \\x escape sequence\n"
);
- // \u escape errors.
-
// \u0000 is not allowed.
// (The string literal is broken in two so that MSVC doesn't complain about
// an invalid universal-character-name.)
P(R"(
pref("string.bad-u-escape", "foo\)" R"(u0000 bar");
- )",
- "test:2: prefs parse error: \\u0000 is not allowed"
- );
-
- // End of string after \u.
- P(R"(
-pref("string.bad-u-escape", "foo\u");
+pref("int.ok", 0);
)",
- "test:2: prefs parse error: malformed \\u escape sequence"
- );
-
- // Punctuation after \u.
- P(R"(
-pref("string.bad-u-escape", "foo\u,bar");
- )",
- "test:2: prefs parse error: malformed \\u escape sequence"
+ "test:2: prefs parse error: \\u0000 is not allowed\n"
);
- // Space after \u.
+ // Various bad things after \u: end of string, punctuation, space, newline,
+ // EOF.
P(R"(
+pref("string.bad-u-escape", "foo\u");
+pref("string.bad-u-escape", "foo\u,bar");
pref("string.bad-u-escape", "foo\u 1234");
- )",
- "test:2: prefs parse error: malformed \\u escape sequence"
- );
-
- // Newline after \u.
- P(R"(
pref("string.bad-u-escape", "foo\u
1234");
- )",
- "test:2: prefs parse error: malformed \\u escape sequence"
- );
-
- // EOF after \u.
- P(R"(
pref("string.bad-u-escape", "foo\u)",
- "test:2: prefs parse error: malformed \\u escape sequence"
+ "test:2: prefs parse error: malformed \\u escape sequence\n"
+ "test:3: prefs parse error: malformed \\u escape sequence\n"
+ "test:4: prefs parse error: malformed \\u escape sequence\n"
+ "test:5: prefs parse error: malformed \\u escape sequence\n"
+ "test:7: prefs parse error: malformed \\u escape sequence\n"
);
// Not enough hex digits.
P(R"(
pref("string.bad-u-escape", "foo\u1");
- )",
- "test:2: prefs parse error: malformed \\u escape sequence"
- );
-
- // Not enough hex digits.
- P(R"(
pref("string.bad-u-escape", "foo\u12");
+pref("string.bad-u-escape", "foo\u123");
+pref("int.ok", 0);
)",
- "test:2: prefs parse error: malformed \\u escape sequence"
- );
-
- // Not enough hex digits.
- P(R"(
-pref("string.bad-u-escape", "foo\u123");
- )",
- "test:2: prefs parse error: malformed \\u escape sequence"
+ "test:2: prefs parse error: malformed \\u escape sequence\n"
+ "test:3: prefs parse error: malformed \\u escape sequence\n"
+ "test:4: prefs parse error: malformed \\u escape sequence\n"
);
// Invalid hex digit.
P(R"(
pref("string.bad-u-escape", "foo\u1G34");
+pref("int.ok", 0);
)",
- "test:2: prefs parse error: malformed \\u escape sequence"
+ "test:2: prefs parse error: malformed \\u escape sequence\n"
);
// High surrogate not followed by low surrogate.
// (The string literal is broken in two so that MSVC doesn't complain about
// an invalid universal-character-name.)
P(R"(
pref("string.bad-u-surrogate", "foo\)" R"(ud83c,blah");
+pref("int.ok", 0);
)",
- "test:2: prefs parse error: expected low surrogate after high surrogate"
+ "test:2: prefs parse error: expected low surrogate after high surrogate\n"
);
// High surrogate followed by invalid low surrogate value.
// (The string literal is broken in two so that MSVC doesn't complain about
// an invalid universal-character-name.)
P(R"(
pref("string.bad-u-surrogate", "foo\)" R"(ud83c\u1234");
+pref("int.ok", 0);
)",
- "test:2: prefs parse error: invalid low surrogate value after high surrogate"
- );
-
- // Bad escape characters.
-
- // Unlike in JavaScript, \b isn't allowed.
- P(R"(
-pref("string.bad-escape", "foo\v");
- )",
- "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+ "test:2: prefs parse error: invalid low surrogate value after high surrogate\n"
);
- // Unlike in JavaScript, \f isn't allowed.
+ // Unlike in JavaScript, \b, \f, \t, \v aren't allowed.
P(R"(
+pref("string.bad-escape", "foo\b");
pref("string.bad-escape", "foo\f");
+pref("string.bad-escape", "foo\t");
+pref("string.bad-escape", "foo\v");
+pref("int.ok", 0);
)",
- "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+ "test:2: prefs parse error: unexpected escape sequence character after '\\'\n"
+ "test:3: prefs parse error: unexpected escape sequence character after '\\'\n"
+ "test:4: prefs parse error: unexpected escape sequence character after '\\'\n"
+ "test:5: prefs parse error: unexpected escape sequence character after '\\'\n"
);
- // Unlike in JavaScript, \t isn't allowed.
- P(R"(
-pref("string.bad-escape", "foo\t");
- )",
- "test:2: prefs parse error: unexpected escape sequence character after '\\'"
- );
-
- // Unlike in JavaScript, \v isn't allowed.
- P(R"(
-pref("string.bad-escape", "foo\v");
- )",
- "test:2: prefs parse error: unexpected escape sequence character after '\\'"
- );
-
- // Non-special letter after \.
+ // Various bad things after \: non-special letter, number, punctuation,
+ // space, newline, EOF.
P(R"(
pref("string.bad-escape", "foo\Q");
- )",
- "test:2: prefs parse error: unexpected escape sequence character after '\\'"
- );
-
- // Number after \.
- P(R"(
pref("string.bad-escape", "foo\1");
- )",
- "test:2: prefs parse error: unexpected escape sequence character after '\\'"
- );
-
- // Punctuation after \.
- P(R"(
pref("string.bad-escape", "foo\,");
- )",
- "test:2: prefs parse error: unexpected escape sequence character after '\\'"
- );
-
- // Space after \.
- P(R"(
pref("string.bad-escape", "foo\ n");
- )",
- "test:2: prefs parse error: unexpected escape sequence character after '\\'"
- );
-
- // Newline after \.
- P(R"(
pref("string.bad-escape", "foo\
n");
- )",
- "test:2: prefs parse error: unexpected escape sequence character after '\\'"
- );
-
- // EOF after \.
- P(R"(
pref("string.bad-escape", "foo\)",
- "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+ "test:2: prefs parse error: unexpected escape sequence character after '\\'\n"
+ "test:3: prefs parse error: unexpected escape sequence character after '\\'\n"
+ "test:4: prefs parse error: unexpected escape sequence character after '\\'\n"
+ "test:5: prefs parse error: unexpected escape sequence character after '\\'\n"
+ "test:6: prefs parse error: unexpected escape sequence character after '\\'\n"
+ "test:8: prefs parse error: unexpected escape sequence character after '\\'\n"
);
// Unterminated string literals.
// Simple case.
P(R"(
pref("string.unterminated-string", "foo
)",
- "test:3: prefs parse error: unterminated string literal"
+ "test:3: prefs parse error: unterminated string literal\n"
+ );
+
+ // Alternative case; `int` comes after the string and is seen as a keyword.
+ // The parser then skips to the ';', so no error about the unterminated
+ // string is issued.
+ P(R"(
+pref("string.unterminated-string", "foo);
+pref("int.ok", 0);
+ )",
+ "test:3: prefs parse error: unknown keyword\n"
);
// Mismatched quotes (1).
P(R"(
pref("string.unterminated-string", "foo');
)",
- "test:3: prefs parse error: unterminated string literal"
+ "test:3: prefs parse error: unterminated string literal\n"
);
// Mismatched quotes (2).
P(R"(
pref("string.unterminated-string", 'foo");
)",
- "test:3: prefs parse error: unterminated string literal"
- );
-
- // Unknown keywords
-
- P(R"(
-foo
- )",
- "test:2: prefs parse error: unknown keyword"
- );
-
- P(R"(
-preff("string.bad-keyword", true);
- )",
- "test:2: prefs parse error: unknown keyword"
+ "test:3: prefs parse error: unterminated string literal\n"
);
+ // Unknown keywords.
P(R"(
+foo;
+preff("string.bad-keyword", true);
ticky_pref("string.bad-keyword", true);
+User_pref("string.bad-keyword", true);
+pref("string.bad-keyword", TRUE);
)",
- "test:2: prefs parse error: unknown keyword"
+ "test:2: prefs parse error: unknown keyword\n"
+ "test:3: prefs parse error: unknown keyword\n"
+ "test:4: prefs parse error: unknown keyword\n"
+ "test:5: prefs parse error: unknown keyword\n"
+ "test:6: prefs parse error: unknown keyword\n"
);
- P(R"(
-User_pref("string.bad-keyword", true);
- )",
- "test:2: prefs parse error: unknown keyword"
- );
-
- P(R"(
-pref("string.bad-keyword", TRUE);
- )",
- "test:2: prefs parse error: unknown keyword"
- );
-
- // Unterminated C-style comment
+ // Unterminated C-style comment.
P(R"(
/* comment
)",
- "test:3: prefs parse error: unterminated /* comment"
+ "test:3: prefs parse error: unterminated /* comment\n"
);
- // Malformed comments.
-
+ // Malformed comment.
P(R"(
/ comment
)",
- "test:2: prefs parse error: expected '/' or '*' after '/'"
+ "test:2: prefs parse error: expected '/' or '*' after '/'\n"
);
- // Unexpected characters
-
+ // C++-style comment ending in EOF (1).
P(R"(
-pref("unexpected.chars", &true);
- )",
- "test:2: prefs parse error: unexpected character"
+// comment)",
+ ""
);
+ // C++-style comment ending in EOF (2).
P(R"(
-pref("unexpected.chars" : true);
- )",
- "test:2: prefs parse error: unexpected character"
+//)",
+ ""
);
+ // Various unexpected characters.
P(R"(
+pref("unexpected.chars", &true);
+pref("unexpected.chars" : true);
@pref("unexpected.chars", true);
- )",
- "test:2: prefs parse error: unexpected character"
- );
-
- P(R"(
pref["unexpected.chars": true];
)",
- "test:2: prefs parse error: unexpected character"
+ "test:2: prefs parse error: unexpected character\n"
+ "test:3: prefs parse error: unexpected character\n"
+ "test:4: prefs parse error: unexpected character\n"
+ "test:5: prefs parse error: unexpected character\n"
);
//-------------------------------------------------------------------------
// All the parsing errors.
//-------------------------------------------------------------------------
P(R"(
"pref"("parse.error": true);
- )",
- "test:2: prefs parse error: expected pref specifier at start of pref definition"
- );
-
- P(R"(
pref1("parse.error": true);
- )",
- "test:2: prefs parse error: expected '(' after pref specifier"
+pref(123: true);
+pref("parse.error" true);
+pref("parse.error", pref);
+pref("parse.error", -true);
+pref("parse.error", +"value");
+pref("parse.error", true;
+pref("parse.error", true)
+pref("int.ok", 1);
+pref("parse.error", true))",
+ "test:2: prefs parse error: expected pref specifier at start of pref definition\n"
+ "test:3: prefs parse error: expected '(' after pref specifier\n"
+ "test:4: prefs parse error: expected pref name after '('\n"
+ "test:5: prefs parse error: expected ',' after pref name\n"
+ "test:6: prefs parse error: expected pref value after ','\n"
+ "test:7: prefs parse error: expected integer literal after '-'\n"
+ "test:8: prefs parse error: expected integer literal after '+'\n"
+ "test:9: prefs parse error: expected ')' after pref value\n"
+ "test:11: prefs parse error: expected ';' after ')'\n"
+ "test:12: prefs parse error: expected ';' after ')'\n"
);
- P(R"(
-pref(123: true);
- )",
- "test:2: prefs parse error: expected pref name after '('"
- );
+ // Parse errors involving unexpected EOF.
P(R"(
-pref("parse.error" true);
- )",
- "test:2: prefs parse error: expected ',' after pref name"
+pref)",
+ "test:2: prefs parse error: expected '(' after pref specifier\n"
);
P(R"(
-pref("parse.error", -true);
- )",
- "test:2: prefs parse error: expected integer literal after '-'"
+pref()",
+ "test:2: prefs parse error: expected pref name after '('\n"
);
P(R"(
-pref("parse.error", +"value");
- )",
- "test:2: prefs parse error: expected integer literal after '+'"
+pref("parse.error")",
+ "test:2: prefs parse error: expected ',' after pref name\n"
+ );
+
+ P(R"(
+pref("parse.error",)",
+ "test:2: prefs parse error: expected pref value after ','\n"
);
P(R"(
-pref("parse.error", pref);
- )",
- "test:2: prefs parse error: expected pref value after ','"
+pref("parse.error", -)",
+ "test:2: prefs parse error: expected integer literal after '-'\n"
);
P(R"(
-pref("parse.error", true;
- )",
- "test:2: prefs parse error: expected ')' after pref value"
+pref("parse.error", +)",
+ "test:2: prefs parse error: expected integer literal after '+'\n"
);
P(R"(
-pref("parse.error", true)
-pref("parse.error", true)
- )",
- "test:3: prefs parse error: expected ';' after ')'"
+pref("parse.error", true)",
+ "test:2: prefs parse error: expected ')' after pref value\n"
+ );
+
+ P(R"(
+pref("parse.error", true))",
+ "test:2: prefs parse error: expected ';' after ')'\n"
);
// This is something we saw in practice with the old parser, which allowed
// repeated semicolons.
P(R"(
pref("parse.error", true);;
+pref("parse.error", true);;;
+pref("parse.error", true);;;;
+pref("int.ok", 0);
)",
- "test:2: prefs parse error: expected pref specifier at start of pref definition"
+ "test:2: prefs parse error: expected pref specifier at start of pref definition\n"
+ "test:3: prefs parse error: expected pref specifier at start of pref definition\n"
+ "test:3: prefs parse error: expected pref specifier at start of pref definition\n"
+ "test:4: prefs parse error: expected pref specifier at start of pref definition\n"
+ "test:4: prefs parse error: expected pref specifier at start of pref definition\n"
+ "test:4: prefs parse error: expected pref specifier at start of pref definition\n"
);
//-------------------------------------------------------------------------
// Invalid syntax after various newline combinations, for the purpose of
// testing that line numbers are correct.
//-------------------------------------------------------------------------
// In all of the following we have a \n, a \r, a \r\n, and then an error, so
// the error is on line 4. (Note: these ones don't use raw string literals
// because MSVC somehow swallows any \r that appears in them.)
P("\n \r \r\n bad",
- "test:4: prefs parse error: unknown keyword"
+ "test:4: prefs parse error: unknown keyword\n"
);
P("#\n#\r#\r\n bad",
- "test:4: prefs parse error: unknown keyword"
+ "test:4: prefs parse error: unknown keyword\n"
);
P("//\n//\r//\r\n bad",
- "test:4: prefs parse error: unknown keyword"
+ "test:4: prefs parse error: unknown keyword\n"
);
P("/*\n \r \r\n*/ bad",
- "test:4: prefs parse error: unknown keyword"
+ "test:4: prefs parse error: unknown keyword\n"
);
// Note: the escape sequences do *not* affect the line number.
P("pref(\"foo\\n\n foo\\r\r foo\\r\\n\r\n foo\", bad);",
- "test:4: prefs parse error: unknown keyword"
+ "test:4: prefs parse error: unknown keyword\n"
);
// clang-format on
}