--- a/modules/libpref/Preferences.cpp
+++ b/modules/libpref/Preferences.cpp
@@ -116,24 +116,26 @@ using namespace mozilla;
typedef nsTArray<nsCString> PrefSaveData;
// 1 MB should be enough for everyone.
static const uint32_t MAX_PREF_LENGTH = 1 * 1024 * 1024;
// Actually, 4kb should be enough for everyone.
static const uint32_t MAX_ADVISABLE_PREF_LENGTH = 4 * 1024;
+// Keep this in sync with PrefType in parser/src/lib.rs.
enum class PrefType : uint8_t
{
None = 0, // only used when neither the default nor user value is set
String = 1,
Int = 2,
Bool = 3,
};
+// Keep this in sync with PrefValue in prefs_parser/src/lib.rs.
union PrefValue {
const char* mStringVal;
int32_t mIntVal;
bool mBoolVal;
bool Equals(PrefType aType, PrefValue aValue)
{
switch (aType) {
@@ -786,18 +788,18 @@ public:
static Pref*
pref_HashTableLookup(const char* aPrefName)
{
MOZ_ASSERT(NS_IsMainThread() || mozilla::ServoStyleSet::IsInServoTraversal());
#ifdef DEBUG
if (!XRE_IsParentProcess()) {
if (gPhase == ContentProcessPhase::eNoPrefsSet) {
- MOZ_CRASH_UNSAFE_PRINTF(
- "accessing pref %s before early prefs are set", aPrefName);
+ MOZ_CRASH_UNSAFE_PRINTF("accessing pref %s before early prefs are set",
+ aPrefName);
}
if (gPhase == ContentProcessPhase::eEarlyPrefsSet && !gInstallingCallback &&
!IsEarlyPref(aPrefName)) {
// If you hit this crash, you have an early access of a non-early pref.
// Consider moving the access later or add the pref to the whitelist of
// early prefs in ContentPrefs.cpp and get review from a DOM peer.
//
@@ -934,626 +936,111 @@ NotifyCallbacks(const char* aPrefName)
gShouldCleanupDeadNodes = false;
}
}
//===========================================================================
// Prefs parsing
//===========================================================================
+extern "C" {
+
+// Keep this in sync with PrefFn in prefs_parser/src/lib.rs.
+typedef void (*PrefsParserPrefFn)(const char* aPrefName,
+ PrefType aType,
+ PrefValueKind aKind,
+ PrefValue aValue,
+ bool aIsSticky);
+
+// Keep this in sync with ErrorFn in prefs_parser/src/lib.rs.
+//
+// `aMsg` is just a borrow of the string, and must be copied if it is used
+// outside the lifetime of the prefs_parser_parse() call.
+typedef void (*PrefsParserErrorFn)(const char* aMsg);
+
+// Keep this in sync with prefs_parser_parse() in prefs_parser/src/lib.rs.
+bool
+prefs_parser_parse(const char* aPath,
+ const char* aBuf,
+ size_t aLen,
+ PrefsParserPrefFn aPrefFn,
+ PrefsParserErrorFn aErrorFn);
+}
+
class Parser
{
public:
- Parser()
- : mState()
- , mNextState()
- , mStrMatch()
- , mStrIndex()
- , mUtf16()
- , mEscLen()
- , mEscTmp()
- , mQuoteChar()
- , mLb()
- , mLbCur()
- , mLbEnd()
- , mVb()
- , mVtype()
- , mIsDefault()
- , mIsSticky()
+ Parser() = default;
+ ~Parser() = default;
+
+ bool Parse(const char* aPath, nsCString& aBuf)
{
+ return prefs_parser_parse(
+ aPath, aBuf.get(), aBuf.Length(), HandlePref, HandleError);
}
- ~Parser() { free(mLb); }
-
- bool Parse(const char* aBuf, int aBufLen);
-
- bool GrowBuf();
-
- void HandleValue(const char* aPrefName,
- PrefType aType,
- PrefValue aValue,
- bool aIsDefault,
- bool aIsSticky);
-
- void ReportProblem(const char* aMessage, int aLine, bool aError);
-
private:
- // Pref parser states.
- enum class State
+ static void HandlePref(const char* aPrefName,
+ PrefType aType,
+ PrefValueKind aKind,
+ PrefValue aValue,
+ bool aIsSticky)
{
- eInit,
- eMatchString,
- eUntilName,
- eQuotedString,
- eUntilComma,
- eUntilValue,
- eIntValue,
- eCommentMaybeStart,
- eCommentBlock,
- eCommentBlockMaybeEnd,
- eEscapeSequence,
- eHexEscape,
- eUTF16LowSurrogate,
- eUntilOpenParen,
- eUntilCloseParen,
- eUntilSemicolon,
- eUntilEOL
- };
-
- static const int kUTF16EscapeNumDigits = 4;
- static const int kHexEscapeNumDigits = 2;
- static const int KBitsPerHexDigit = 4;
-
- static constexpr const char* kUserPref = "user_pref";
- static constexpr const char* kPref = "pref";
- static constexpr const char* kStickyPref = "sticky_pref";
- static constexpr const char* kTrue = "true";
- static constexpr const char* kFalse = "false";
-
- State mState; // current parse state
- State mNextState; // sometimes used...
- const char* mStrMatch; // string to match
- int mStrIndex; // next char of smatch to check;
- // also, counter in \u parsing
- char16_t mUtf16[2]; // parsing UTF16 (\u) escape
- int mEscLen; // length in mEscTmp
- char mEscTmp[6]; // raw escape to put back if err
- char mQuoteChar; // char delimiter for quotations
- char* mLb; // line buffer (only allocation)
- char* mLbCur; // line buffer cursor
- char* mLbEnd; // line buffer end
- char* mVb; // value buffer (ptr into mLb)
- Maybe<PrefType> mVtype; // pref value type
- bool mIsDefault; // true if (default) pref
- bool mIsSticky; // true if (sticky) pref
-};
-
-// This function will increase the size of the buffer owned by the given pref
-// parse state. We currently use a simple doubling algorithm, but the only hard
-// requirement is that it increase the buffer by at least the size of the
-// mEscTmp buffer used for escape processing (currently 6 bytes).
-//
-// The buffer is used to store partial pref lines. It is freed when the parse
-// state is destroyed.
-//
-// This function updates all pointers that reference an address within mLb
-// since realloc may relocate the buffer.
-//
-// Returns false on failure.
-bool
-Parser::GrowBuf()
-{
- int bufLen, curPos, valPos;
-
- bufLen = mLbEnd - mLb;
- curPos = mLbCur - mLb;
- valPos = mVb - mLb;
-
- if (bufLen == 0) {
- bufLen = 128; // default buffer size
- } else {
- bufLen <<= 1; // double buffer size
+ pref_SetPref(
+ aPrefName, aType, aKind, aValue, aIsSticky, /* fromFile */ true);
}
- mLb = (char*)realloc(mLb, bufLen);
- if (!mLb) {
- return false;
- }
-
- mLbCur = mLb + curPos;
- mLbEnd = mLb + bufLen;
- mVb = mLb + valPos;
-
- return true;
-}
-
-void
-Parser::HandleValue(const char* aPrefName,
- PrefType aType,
- PrefValue aValue,
- bool aIsDefault,
- bool aIsSticky)
-{
- PrefValueKind kind =
- aIsDefault ? PrefValueKind::Default : PrefValueKind::User;
- pref_SetPref(aPrefName, aType, kind, aValue, aIsSticky, /* fromFile */ true);
-}
-
-// Report an error or a warning. If not specified, just dump to stderr.
-void
-Parser::ReportProblem(const char* aMessage, int aLine, bool aError)
-{
- nsPrintfCString message("** Preference parsing %s (line %d) = %s **\n",
- (aError ? "error" : "warning"),
- aLine,
- aMessage);
- nsresult rv;
- nsCOMPtr<nsIConsoleService> console =
- do_GetService("@mozilla.org/consoleservice;1", &rv);
- if (NS_SUCCEEDED(rv)) {
- console->LogStringMessage(NS_ConvertUTF8toUTF16(message).get());
- } else {
- printf_stderr("%s", message.get());
- }
-}
-
-// Parse a buffer containing some portion of a preference file. This function
-// may be called repeatedly as new data is made available. The PrefReader
-// callback function passed to Parser's constructor will be called as preference
-// name value pairs are extracted from the data. Returns false if buffer
-// contains malformed content.
-//
-// Pseudo-BNF
-// ----------
-// function = LJUNK function-name JUNK function-args
-// function-name = "user_pref" | "pref" | "sticky_pref"
-// function-args = "(" JUNK pref-name JUNK "," JUNK pref-value JUNK ")" JUNK ";"
-// pref-name = quoted-string
-// pref-value = quoted-string | "true" | "false" | integer-value
-// JUNK = *(WS | comment-block | comment-line)
-// LJUNK = *(WS | comment-block | comment-line | bcomment-line)
-// WS = SP | HT | LF | VT | FF | CR
-// SP = <US-ASCII SP, space (32)>
-// HT = <US-ASCII HT, horizontal-tab (9)>
-// LF = <US-ASCII LF, linefeed (10)>
-// VT = <US-ASCII HT, vertical-tab (11)>
-// FF = <US-ASCII FF, form-feed (12)>
-// CR = <US-ASCII CR, carriage return (13)>
-// comment-block = <C/C++ style comment block>
-// comment-line = <C++ style comment line>
-// bcomment-line = <bourne-shell style comment line>
-//
-bool
-Parser::Parse(const char* aBuf, int aBufLen)
-{
- // The line number is currently only used for the error/warning reporting.
- int lineNum = 0;
-
- State state = mState;
- for (const char* end = aBuf + aBufLen; aBuf != end; ++aBuf) {
- char c = *aBuf;
- if (c == '\r' || c == '\n' || c == 0x1A) {
- lineNum++;
+ static void HandleError(const char* aMsg)
+ {
+ nsresult rv;
+ nsCOMPtr<nsIConsoleService> console =
+ do_GetService("@mozilla.org/consoleservice;1", &rv);
+ if (NS_SUCCEEDED(rv)) {
+ console->LogStringMessage(NS_ConvertUTF8toUTF16(aMsg).get());
+ } else {
+ printf_stderr("%s\n", aMsg);
}
-
- switch (state) {
- // initial state
- case State::eInit:
- if (mLbCur != mLb) { // reset state
- mLbCur = mLb;
- mVb = nullptr;
- mVtype = Nothing();
- mIsDefault = false;
- mIsSticky = false;
- }
- switch (c) {
- case '/': // begin comment block or line?
- state = State::eCommentMaybeStart;
- break;
- case '#': // accept shell style comments
- state = State::eUntilEOL;
- break;
- case 'u': // indicating user_pref
- case 's': // indicating sticky_pref
- case 'p': // indicating pref
- if (c == 'u') {
- mStrMatch = kUserPref;
- } else if (c == 's') {
- mStrMatch = kStickyPref;
- } else {
- mStrMatch = kPref;
- }
- mStrIndex = 1;
- mNextState = State::eUntilOpenParen;
- state = State::eMatchString;
- break;
- // else skip char
- }
- break;
-
- // string matching
- case State::eMatchString:
- if (c == mStrMatch[mStrIndex++]) {
- // If we've matched all characters, then move to next state.
- if (mStrMatch[mStrIndex] == '\0') {
- state = mNextState;
- mNextState = State::eInit; // reset next state
- }
- // else wait for next char
- } else {
- ReportProblem("non-matching string", lineNum, true);
- NS_WARNING("malformed pref file");
- return false;
- }
- break;
-
- // quoted string parsing
- case State::eQuotedString:
- // we assume that the initial quote has already been consumed
- if (mLbCur == mLbEnd && !GrowBuf()) {
- return false; // out of memory
- }
- if (c == '\\') {
- state = State::eEscapeSequence;
- } else if (c == mQuoteChar) {
- *mLbCur++ = '\0';
- state = mNextState;
- mNextState = State::eInit; // reset next state
- } else {
- *mLbCur++ = c;
- }
- break;
-
- // name parsing
- case State::eUntilName:
- if (c == '\"' || c == '\'') {
- mIsDefault = (mStrMatch == kPref || mStrMatch == kStickyPref);
- mIsSticky = (mStrMatch == kStickyPref);
- mQuoteChar = c;
- mNextState = State::eUntilComma; // return here when done
- state = State::eQuotedString;
- } else if (c == '/') { // allow embedded comment
- mNextState = state; // return here when done with comment
- state = State::eCommentMaybeStart;
- } else if (!isspace(c)) {
- ReportProblem("need space, comment or quote", lineNum, true);
- NS_WARNING("malformed pref file");
- return false;
- }
- break;
-
- // parse until we find a comma separating name and value
- case State::eUntilComma:
- if (c == ',') {
- mVb = mLbCur;
- state = State::eUntilValue;
- } else if (c == '/') { // allow embedded comment
- mNextState = state; // return here when done with comment
- state = State::eCommentMaybeStart;
- } else if (!isspace(c)) {
- ReportProblem("need space, comment or comma", lineNum, true);
- NS_WARNING("malformed pref file");
- return false;
- }
- break;
-
- // value parsing
- case State::eUntilValue:
- // The pref value type is unknown. So, we scan for the first character
- // of the value, and determine the type from that.
- if (c == '\"' || c == '\'') {
- mVtype = Some(PrefType::String);
- mQuoteChar = c;
- mNextState = State::eUntilCloseParen;
- state = State::eQuotedString;
- } else if (c == 't' || c == 'f') {
- mVb = (char*)(c == 't' ? kTrue : kFalse);
- mVtype = Some(PrefType::Bool);
- mStrMatch = mVb;
- mStrIndex = 1;
- mNextState = State::eUntilCloseParen;
- state = State::eMatchString;
- } else if (isdigit(c) || (c == '-') || (c == '+')) {
- mVtype = Some(PrefType::Int);
- // write c to line buffer...
- if (mLbCur == mLbEnd && !GrowBuf()) {
- return false; // out of memory
- }
- *mLbCur++ = c;
- state = State::eIntValue;
- } else if (c == '/') { // allow embedded comment
- mNextState = state; // return here when done with comment
- state = State::eCommentMaybeStart;
- } else if (!isspace(c)) {
- ReportProblem("need value, comment or space", lineNum, true);
- NS_WARNING("malformed pref file");
- return false;
- }
- break;
-
- case State::eIntValue:
- // grow line buffer if necessary...
- if (mLbCur == mLbEnd && !GrowBuf()) {
- return false; // out of memory
- }
- if (isdigit(c)) {
- *mLbCur++ = c;
- } else {
- *mLbCur++ = '\0'; // stomp null terminator; we are done.
- if (c == ')') {
- state = State::eUntilSemicolon;
- } else if (c == '/') { // allow embedded comment
- mNextState = State::eUntilCloseParen;
- state = State::eCommentMaybeStart;
- } else if (isspace(c)) {
- state = State::eUntilCloseParen;
- } else {
- ReportProblem("while parsing integer", lineNum, true);
- NS_WARNING("malformed pref file");
- return false;
- }
- }
- break;
-
- // comment parsing
- case State::eCommentMaybeStart:
- switch (c) {
- case '*': // comment block
- state = State::eCommentBlock;
- break;
- case '/': // comment line
- state = State::eUntilEOL;
- break;
- default:
- // pref file is malformed
- ReportProblem("while parsing comment", lineNum, true);
- NS_WARNING("malformed pref file");
- return false;
- }
- break;
-
- case State::eCommentBlock:
- if (c == '*') {
- state = State::eCommentBlockMaybeEnd;
- }
- break;
-
- case State::eCommentBlockMaybeEnd:
- switch (c) {
- case '/':
- state = mNextState;
- mNextState = State::eInit;
- break;
- case '*': // stay in this state
- break;
- default:
- state = State::eCommentBlock;
- break;
- }
- break;
-
- // string escape sequence parsing
- case State::eEscapeSequence:
- // It's not necessary to resize the buffer here since we should be
- // writing only one character and the resize check would have been done
- // for us in the previous state.
- switch (c) {
- case '\"':
- case '\'':
- case '\\':
- break;
- case 'r':
- c = '\r';
- break;
- case 'n':
- c = '\n';
- break;
- case 'x': // hex escape -- always interpreted as Latin-1
- case 'u': // UTF16 escape
- mEscTmp[0] = c;
- mEscLen = 1;
- mUtf16[0] = mUtf16[1] = 0;
- mStrIndex =
- (c == 'x') ? kHexEscapeNumDigits : kUTF16EscapeNumDigits;
- state = State::eHexEscape;
- continue;
- default:
- ReportProblem(
- "preserving unexpected JS escape sequence", lineNum, false);
- NS_WARNING("preserving unexpected JS escape sequence");
- // Invalid escape sequence so we do have to write more than one
- // character. Grow line buffer if necessary...
- if ((mLbCur + 1) == mLbEnd && !GrowBuf()) {
- return false; // out of memory
- }
- *mLbCur++ = '\\'; // preserve the escape sequence
- break;
- }
- *mLbCur++ = c;
- state = State::eQuotedString;
- break;
-
- // parsing a hex (\xHH) or mUtf16 escape (\uHHHH)
- case State::eHexEscape: {
- char udigit;
- if (c >= '0' && c <= '9') {
- udigit = (c - '0');
- } else if (c >= 'A' && c <= 'F') {
- udigit = (c - 'A') + 10;
- } else if (c >= 'a' && c <= 'f') {
- udigit = (c - 'a') + 10;
- } else {
- // bad escape sequence found, write out broken escape as-is
- ReportProblem(
- "preserving invalid or incomplete hex escape", lineNum, false);
- NS_WARNING("preserving invalid or incomplete hex escape");
- *mLbCur++ = '\\'; // original escape slash
- if ((mLbCur + mEscLen) >= mLbEnd && !GrowBuf()) {
- return false;
- }
- for (int i = 0; i < mEscLen; ++i) {
- *mLbCur++ = mEscTmp[i];
- }
-
- // Push the non-hex character back for re-parsing. (++aBuf at the top
- // of the loop keeps this safe.)
- --aBuf;
- state = State::eQuotedString;
- continue;
- }
-
- // have a digit
- mEscTmp[mEscLen++] = c; // preserve it
- mUtf16[1] <<= KBitsPerHexDigit;
- mUtf16[1] |= udigit;
- mStrIndex--;
- if (mStrIndex == 0) {
- // we have the full escape, convert to UTF8
- int utf16len = 0;
- if (mUtf16[0]) {
- // already have a high surrogate, this is a two char seq
- utf16len = 2;
- } else if (0xD800 == (0xFC00 & mUtf16[1])) {
- // a high surrogate, can't convert until we have the low
- mUtf16[0] = mUtf16[1];
- mUtf16[1] = 0;
- state = State::eUTF16LowSurrogate;
- break;
- } else {
- // a single mUtf16 character
- mUtf16[0] = mUtf16[1];
- utf16len = 1;
- }
-
- // The actual conversion.
- // Make sure there's room, 6 bytes is max utf8 len (in theory; 4
- // bytes covers the actual mUtf16 range).
- if (mLbCur + 6 >= mLbEnd && !GrowBuf()) {
- return false;
- }
-
- ConvertUTF16toUTF8 converter(mLbCur);
- converter.write(mUtf16, utf16len);
- mLbCur += converter.Size();
- state = State::eQuotedString;
- }
- break;
- }
-
- // looking for beginning of mUtf16 low surrogate
- case State::eUTF16LowSurrogate:
- if (mStrIndex == 0 && c == '\\') {
- ++mStrIndex;
- } else if (mStrIndex == 1 && c == 'u') {
- // escape sequence is correct, now parse hex
- mStrIndex = kUTF16EscapeNumDigits;
- mEscTmp[0] = 'u';
- mEscLen = 1;
- state = State::eHexEscape;
- } else {
- // Didn't find expected low surrogate. Ignore high surrogate (it
- // would just get converted to nothing anyway) and start over with
- // this character.
- --aBuf;
- if (mStrIndex == 1) {
- state = State::eEscapeSequence;
- } else {
- state = State::eQuotedString;
- }
- continue;
- }
- break;
-
- // function open and close parsing
- case State::eUntilOpenParen:
- // tolerate only whitespace and embedded comments
- if (c == '(') {
- state = State::eUntilName;
- } else if (c == '/') {
- mNextState = state; // return here when done with comment
- state = State::eCommentMaybeStart;
- } else if (!isspace(c)) {
- ReportProblem(
- "need space, comment or open parentheses", lineNum, true);
- NS_WARNING("malformed pref file");
- return false;
- }
- break;
-
- case State::eUntilCloseParen:
- // tolerate only whitespace and embedded comments
- if (c == ')') {
- state = State::eUntilSemicolon;
- } else if (c == '/') {
- mNextState = state; // return here when done with comment
- state = State::eCommentMaybeStart;
- } else if (!isspace(c)) {
- ReportProblem(
- "need space, comment or closing parentheses", lineNum, true);
- NS_WARNING("malformed pref file");
- return false;
- }
- break;
-
- // function terminator ';' parsing
- case State::eUntilSemicolon:
- // tolerate only whitespace and embedded comments
- if (c == ';') {
-
- PrefValue value;
-
- switch (*mVtype) {
- case PrefType::String:
- value.mStringVal = mVb;
- break;
-
- case PrefType::Int:
- if ((mVb[0] == '-' || mVb[0] == '+') && mVb[1] == '\0') {
- ReportProblem("invalid integer value", 0, true);
- NS_WARNING("malformed integer value");
- return false;
- }
- value.mIntVal = atoi(mVb);
- break;
-
- case PrefType::Bool:
- value.mBoolVal = (mVb == kTrue);
- break;
-
- default:
- MOZ_CRASH();
- }
-
- // We've extracted a complete name/value pair.
- HandleValue(mLb, *mVtype, value, mIsDefault, mIsSticky);
-
- state = State::eInit;
- } else if (c == '/') {
- mNextState = state; // return here when done with comment
- state = State::eCommentMaybeStart;
- } else if (!isspace(c)) {
- ReportProblem("need space, comment or semicolon", lineNum, true);
- NS_WARNING("malformed pref file");
- return false;
- }
- break;
-
- // eol parsing
- case State::eUntilEOL:
- // Need to handle mac, unix, or dos line endings. State::eInit will
- // eat the next \n in case we have \r\n.
- if (c == '\r' || c == '\n' || c == 0x1A) {
- state = mNextState;
- mNextState = State::eInit; // reset next state
- }
- break;
- }
+ NS_WARNING(aMsg);
}
- mState = state;
- return true;
+};
+
+// The following code is test code for the gtest.
+
+static void
+TestParseErrorHandlePref(const char* aPrefName,
+ PrefType aType,
+ PrefValueKind aKind,
+ PrefValue aValue,
+ bool aIsSticky)
+{
+}
+
+static char* gTestParseErrorMsg;
+
+static void
+TestParseErrorHandleError(const char* aMsg)
+{
+ // aMsg's lifetime is shorter than we need, so duplicate it.
+ gTestParseErrorMsg = moz_xstrdup(aMsg);
+}
+
+// Keep this in sync with the declaration in test/gtest/Parser.cpp.
+void
+TestParseError(const char* aText, nsCString& aErrorMsg)
+{
+ prefs_parser_parse("test",
+ aText,
+ strlen(aText),
+ TestParseErrorHandlePref,
+ TestParseErrorHandleError);
+
+ // Copy the duplicated error message into the outparam, then free it.
+ aErrorMsg.Assign(gTestParseErrorMsg);
+ free(gTestParseErrorMsg);
+ gTestParseErrorMsg = nullptr;
}
//===========================================================================
// nsPrefBranch et al.
//===========================================================================
namespace mozilla {
class PreferenceServiceReporter;
@@ -3951,18 +3438,21 @@ Preferences::WritePrefFile(nsIFile* aFil
}
static nsresult
openPrefFile(nsIFile* aFile)
{
nsCString data;
MOZ_TRY_VAR(data, URLPreloader::ReadFile(aFile));
+ nsAutoString path;
+ aFile->GetPath(path);
+
Parser parser;
- if (!parser.Parse(data.get(), data.Length())) {
+ if (!parser.Parse(NS_ConvertUTF16toUTF8(path).get(), data)) {
return NS_ERROR_FILE_CORRUPTED;
}
return NS_OK;
}
static int
pref_CompareFileNames(nsIFile* aFile1, nsIFile* aFile2, void* /* unused */)
@@ -4084,17 +3574,17 @@ pref_LoadPrefsInDir(nsIFile* aDir,
static nsresult
pref_ReadPrefFromJar(nsZipArchive* aJarReader, const char* aName)
{
nsCString manifest;
MOZ_TRY_VAR(manifest,
URLPreloader::ReadZip(aJarReader, nsDependentCString(aName)));
Parser parser;
- parser.Parse(manifest.get(), manifest.Length());
+ parser.Parse(aName, manifest);
return NS_OK;
}
// Initialize default preference JavaScript buffers from appropriate TEXT
// resources.
/* static */ Result<Ok, const char*>
Preferences::InitInitialObjects()
new file mode 100644
--- /dev/null
+++ b/modules/libpref/parser/src/lib.rs
@@ -0,0 +1,790 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! This crate implements a prefs file parser.
+//!
+//! Pref files have the following grammar.
+//!
+//! <pref-file> = <pref>*
+//! <pref> = <pref-spec> "(" <pref-name> "," <pref-value> ")" ";"
+//! <pref-spec> = "user_pref" | "pref" | "sticky_pref"
+//! <pref-name> = <string-literal>
+//! <pref-value> = <string-literal> | "true" | "false" | <int-value>
+//! <int-value> = <sign>? <int-literal>
+//! <sign> = "+" | "-"
+//! <int-literal> = [0-9]+ (and cannot be followed by [A-Za-z_])
+//! <string-literal> =
+//! A single or double-quoted string, with the following escape sequences
+//! allowed: \", \', \\, \n, \r, \xNN, \uNNNN, where \xNN gives a raw byte
+//! value that is copied directly into an 8-bit string value, and \uNNNN
+//! gives a UTF-16 code unit that is converted to UTF-8 before being copied
+//! into an 8-bit string value. \x00 and \u0000 are disallowed because they
+//! would cause C++ code handling such strings to misbehave.
+//!
+//! Comments can take three forms:
+//! - # Python-style comments
+//! - // C++ style comments
+//! - /* C style comments (non-nested) */
+//!
+//! Non-end-of-line whitespace chars are \t, \v, \f, and space.
+//!
+//! End-of-line sequences can take three forms, each of which is considered as a
+//! single EoL:
+//! - \n
+//! - \r (without subsequent \n)
+//! - \r\n
+//!
+//! The valid range for <int-value> is -2,147,483,648..2,147,483,647. Values
+//! outside that range will result in a parse error.
+//!
+//! A '\0' char is interpreted as the end of the file. The use of this character
+//! in a prefs file is not recommended. Within string literals \x00 or \u0000
+//! can be used instead.
+
+// This parser uses several important optimizations.
+//
+// - Because "'\0' means EOF" is part of the grammar (see above) we can match
+// EOF as a normal char/token, which means we can avoid a typical "do we
+// still have chars remaining?" test in get_char(), which gives a speedup
+// because get_char() is a very hot function. (Actually, Rust would
+// bounds-check this function anyway, so we have get_char_unchecked() which
+// is used for the two hottest call sites.)
+//
+// This also means EOF is representable by a u8. If EOF was represented by an
+// out-of-bad value such as -1 or 256, we'd have to return a larger type such
+// as u16 or i16 from get_char().
+//
+// - When starting a new token, it uses a lookup table with the first char,
+// which quickly identifies what kind of token it will be. Furthermore, if
+// that token is an unambiguous single-char token (e.g. '(', ')', '+', ',',
+// '-', ';'), the parser will return the appropriate token kind value at
+// minimal cost because the single-char tokens have a uniform representation.
+//
+// - It has a lookup table that identifies chars in string literals that need
+// special handling. This means non-special chars (the common case) can be
+// handled with a single test, rather than testing for the multiple special
+// cases.
+//
+// - It pre-scans string literals for special chars. If none are present, it
+// bulk copies the string literal into a Vec, which is faster than doing a
+// char-by-char copy.
+//
+// - It reuses Vecs to avoid creating a new one for each string literal.
+
+use std::os::raw::{c_char, c_uchar};
+
+//---------------------------------------------------------------------------
+// The public interface
+//---------------------------------------------------------------------------
+
+/// Keep this in sync with PrefType in Preferences.cpp.
+#[derive(Clone, Copy, Debug)]
+#[repr(u8)]
+pub enum PrefType {
+ None,
+ String,
+ Int,
+ Bool,
+}
+
+/// Keep this in sync with PrefValueKind in Preferences.h.
+#[derive(Clone, Copy, Debug)]
+#[repr(u8)]
+pub enum PrefValueKind {
+ Default,
+ User
+}
+
+/// Keep this in sync with PrefValue in Preferences.cpp.
+#[repr(C)]
+pub union PrefValue {
+ string_val: *const c_char,
+ int_val: i32,
+ bool_val: bool,
+}
+
+/// Keep this in sync with PrefsParserPrefFn in Preferences.cpp.
+type PrefFn = unsafe extern "C" fn(pref_name: *const c_char, pref_type: PrefType,
+ pref_value_kind: PrefValueKind, pref_value: PrefValue,
+ is_sticky: bool);
+
+/// Keep this in sync with PrefsParserErrorFn in Preferences.cpp.
+type ErrorFn = unsafe extern "C" fn(msg: *const c_char);
+
+/// Parse the contents of a prefs file.
+///
+/// `buf` is a null-terminated string. `len` is its length, excluding the
+/// null terminator.
+///
+/// Keep this in sync with prefs_parser_parse() in prefs_parser/src/lib.rs.
+#[no_mangle]
+pub extern "C" fn prefs_parser_parse(path: *const c_char, buf: *const c_char, len: usize,
+ pref_fn: PrefFn, error_fn: ErrorFn) -> bool {
+ let path = unsafe { std::ffi::CStr::from_ptr(path).to_string_lossy().into_owned() };
+
+ // Make sure `buf` ends in a '\0', and include that in the length, because
+ // it represents EOF.
+ let buf = unsafe { std::slice::from_raw_parts(buf as *const c_uchar, len + 1) };
+ assert!(buf.last() == Some(&EOF));
+
+ let mut parser = Parser::new(&path, &buf, pref_fn, error_fn);
+ parser.parse()
+}
+
+//---------------------------------------------------------------------------
+// The implementation
+//---------------------------------------------------------------------------
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum Token {
+ // Unambiguous single-char tokens.
+ SingleChar(u8),
+
+ // Keywords
+ Pref, // pref
+ StickyPref, // sticky_pref
+ UserPref, // user_pref
+ True, // true
+ False, // false
+
+ // String literal, e.g. '"string"'. The value is stored elsewhere.
+ String,
+
+ // Unsigned integer literal, e.g. '123'. Although libpref uses i32 values,
+ // any '-' and '+' before an integer literal are treated as separate
+ // tokens, so these token values are always positive. Furthermore, we
+ // tokenize int literals as u32 so that 2147483648 (which doesn't fit into
+ // an i32) can be subsequently negated to -2147483648 (which does fit into
+ // an i32) if a '-' token precedes it.
+ Int(u32),
+
+ // Malformed token.
+ Error(&'static str),
+}
+
+// We categorize every char by what action should be taken when it appears at
+// the start of a new token.
+#[derive(Clone, Copy, PartialEq)]
+enum CharKind {
+ // These are ordered by frequency. See the comment in GetToken().
+ SingleChar, // Unambiguous single-char tokens: [()+,-]
+ SpaceNL, // [\t\v\f \n]
+ Keyword, // [A-Za-z_]
+ Quote, // ["']
+ Slash, // /
+ Digit, // [0-9]
+ Hash, // #
+ CR, // \r
+ Other // Everything else; invalid except within strings and comments.
+}
+
+const C_SINGL: CharKind = CharKind::SingleChar;
+const C_SPCNL: CharKind = CharKind::SpaceNL;
+const C_KEYWD: CharKind = CharKind::Keyword;
+const C_QUOTE: CharKind = CharKind::Quote;
+const C_SLASH: CharKind = CharKind::Slash;
+const C_DIGIT: CharKind = CharKind::Digit;
+const C_HASH : CharKind = CharKind::Hash;
+const C_CR : CharKind = CharKind::CR;
+const C______: CharKind = CharKind::Other;
+
+const CHAR_KINDS: [CharKind; 256] = [
+/* 0 1 2 3 4 5 6 7 8 9 */
+/* 0+ */ C_SINGL, C______, C______, C______, C______, C______, C______, C______, C______, C_SPCNL,
+/* 10+ */ C_SPCNL, C_SPCNL, C_SPCNL, C_CR , C______, C______, C______, C______, C______, C______,
+/* 20+ */ C______, C______, C______, C______, C______, C______, C______, C______, C______, C______,
+/* 30+ */ C______, C______, C_SPCNL, C______, C_QUOTE, C_HASH , C______, C______, C______, C_QUOTE,
+/* 40+ */ C_SINGL, C_SINGL, C______, C_SINGL, C_SINGL, C_SINGL, C______, C_SLASH, C_DIGIT, C_DIGIT,
+/* 50+ */ C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C______, C_SINGL,
+/* 60+ */ C______, C______, C______, C______, C______, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD,
+/* 70+ */ C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD,
+/* 80+ */ C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD,
+/* 90+ */ C_KEYWD, C______, C______, C______, C______, C_KEYWD, C______, C_KEYWD, C_KEYWD, C_KEYWD,
+/* 100+ */ C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD,
+/* 110+ */ C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD, C_KEYWD,
+/* 120+ */ C_KEYWD, C_KEYWD, C_KEYWD, C______, C______, C______, C______, C______, C______, C______,
+/* 130+ */ C______, C______, C______, C______, C______, C______, C______, C______, C______, C______,
+/* 140+ */ C______, C______, C______, C______, C______, C______, C______, C______, C______, C______,
+/* 150+ */ C______, C______, C______, C______, C______, C______, C______, C______, C______, C______,
+/* 160+ */ C______, C______, C______, C______, C______, C______, C______, C______, C______, C______,
+/* 170+ */ C______, C______, C______, C______, C______, C______, C______, C______, C______, C______,
+/* 180+ */ C______, C______, C______, C______, C______, C______, C______, C______, C______, C______,
+/* 190+ */ C______, C______, C______, C______, C______, C______, C______, C______, C______, C______,
+/* 200+ */ C______, C______, C______, C______, C______, C______, C______, C______, C______, C______,
+/* 210+ */ C______, C______, C______, C______, C______, C______, C______, C______, C______, C______,
+/* 220+ */ C______, C______, C______, C______, C______, C______, C______, C______, C______, C______,
+/* 230+ */ C______, C______, C______, C______, C______, C______, C______, C______, C______, C______,
+/* 240+ */ C______, C______, C______, C______, C______, C______, C______, C______, C______, C______,
+/* 250+ */ C______, C______, C______, C______, C______, C______
+];
+
+const _______: bool = false;
+const SPECIAL_STRING_CHARS: [bool; 256] = [
+/* 0 1 2 3 4 5 6 7 8 9 */
+/* 0+ */ true, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 10+ */ true, _______, _______, true, _______, _______, _______, _______, _______, _______,
+/* 20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 30+ */ _______, _______, _______, _______, true, _______, _______, _______, _______, true,
+/* 40+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 50+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 60+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 70+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 80+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 90+ */ _______, _______, true, _______, _______, _______, _______, _______, _______, _______,
+/* 100+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 110+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 120+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 130+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 140+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 150+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 160+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 170+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 180+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 190+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 200+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 210+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 220+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 230+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 240+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/* 250+ */ _______, _______, _______, _______, _______, _______
+];
+
+struct KeywordInfo {
+ string: &'static [u8],
+ token: Token,
+}
+
+const KEYWORD_INFOS: &[KeywordInfo; 5] = &[
+ // These are ordered by frequency.
+ KeywordInfo { string: b"pref", token: Token::Pref },
+ KeywordInfo { string: b"true", token: Token::True },
+ KeywordInfo { string: b"false", token: Token::False },
+ KeywordInfo { string: b"user_pref", token: Token::UserPref },
+ KeywordInfo { string: b"sticky_pref", token: Token::StickyPref },
+];
+
+struct Parser<'t> {
+ path: &'t str, // Path to the file being parsed. Used in error messages.
+ buf: &'t [u8], // Text being parsed.
+ i: usize, // Index of next char to be read.
+ line_num: u32, // Current line number within the text.
+ pref_fn: PrefFn, // Callback for processing each pref.
+ error_fn: ErrorFn, // Callback for parse errors.
+}
+
+// As described above, we use 0 to represent EOF.
+const EOF: u8 = b'\0';
+
+impl<'t> Parser<'t> {
+ fn new(path: &'t str, buf: &'t [u8], pref_fn: PrefFn, error_fn: ErrorFn) -> Parser<'t> {
+ // Make sure these tables take up 1 byte per entry.
+ assert!(std::mem::size_of_val(&CHAR_KINDS) == 256);
+ assert!(std::mem::size_of_val(&SPECIAL_STRING_CHARS) == 256);
+
+ Parser {
+ path: path,
+ buf: buf,
+ i: 0,
+ line_num: 1,
+ pref_fn: pref_fn,
+ error_fn: error_fn,
+ }
+ }
+
+ fn parse(&mut self) -> bool {
+ // These are reused, because allocating a new Vec for every string is slow.
+ let mut name_str = Vec::with_capacity(128); // For pref names.
+ let mut value_str = Vec::with_capacity(512); // For string pref values.
+ let mut none_str = Vec::with_capacity(0); // For tokens that shouldn't be strings.
+
+ loop {
+ // Note: if you add error recovery here, be aware that the
+ // erroneous char may have been the text-ending EOF, in which case
+ // self.i will point one past the end of the text. You should check
+ // for that possibility before getting more chars.
+
+ // EOF?
+ let token = self.get_token(&mut none_str);
+ if token == Token::SingleChar(EOF) {
+ break;
+ }
+
+ // <pref-spec>
+ let (pref_value_kind, is_sticky) = match token {
+ Token::Pref => {
+ (PrefValueKind::Default, false)
+ }
+ Token::StickyPref => {
+ (PrefValueKind::Default, true)
+ }
+ Token::UserPref => {
+ (PrefValueKind::User, false)
+ }
+ _ => return self.error(token,
+ "expected pref specifier at start of pref definition")
+ };
+
+ // "("
+ let token = self.get_token(&mut none_str);
+ if token != Token::SingleChar(b'(') {
+ return self.error(token, "expected '(' after pref specifier");
+ }
+
+ // <pref-name>
+ let token = self.get_token(&mut name_str);
+ let pref_name = if token == Token::String {
+ &name_str
+ } else {
+ return self.error(token, "expected pref name after '('");
+ };
+
+ // ","
+ let token = self.get_token(&mut none_str);
+ if token != Token::SingleChar(b',') {
+ return self.error(token, "expected ',' after pref name");
+ }
+
+ // <pref-value>
+ let token = self.get_token(&mut value_str);
+ let (pref_type, pref_value) = match token {
+ Token::True => {
+ (PrefType::Bool, PrefValue { bool_val: true })
+ }
+ Token::False => {
+ (PrefType::Bool, PrefValue { bool_val: false })
+ }
+ Token::String => {
+ (PrefType::String,
+ PrefValue { string_val: value_str.as_ptr() as *const c_char })
+
+ }
+ Token::Int(u) => {
+ // Accept u <= 2147483647; anything larger will overflow i32.
+ if u <= std::i32::MAX as u32 {
+ (PrefType::Int, PrefValue { int_val: u as i32 })
+ } else {
+ return self.error(Token::Error("integer literal overflowed"), "");
+ }
+
+ }
+ Token::SingleChar(b'-') => {
+ let token = self.get_token(&mut none_str);
+ if let Token::Int(u) = token {
+ // Accept u <= 2147483648; anything larger will overflow i32 once negated.
+ if u <= std::i32::MAX as u32 {
+ (PrefType::Int, PrefValue { int_val: -(u as i32) })
+ } else if u == std::i32::MAX as u32 + 1 {
+ (PrefType::Int, PrefValue { int_val: std::i32::MIN })
+ } else {
+ return self.error(Token::Error("integer literal overflowed"), "");
+ }
+ } else {
+ return self.error(token, "expected integer literal after '-'");
+ }
+
+ }
+ Token::SingleChar(b'+') => {
+ let token = self.get_token(&mut none_str);
+ if let Token::Int(u) = token {
+ // Accept u <= 2147483647; anything larger will overflow i32.
+ if u <= std::i32::MAX as u32 {
+ (PrefType::Int, PrefValue { int_val: u as i32 })
+ } else {
+ return self.error(Token::Error("integer literal overflowed"), "");
+ }
+ } else {
+ return self.error(token, "expected integer literal after '+'");
+ }
+
+ }
+ _ => return self.error(token, "expected pref value after ','")
+ };
+
+ // ")"
+ let token = self.get_token(&mut none_str);
+ if token != Token::SingleChar(b')') {
+ return self.error(token, "expected ')' after pref value");
+ }
+
+ // ";"
+ let token = self.get_token(&mut none_str);
+ if token != Token::SingleChar(b';') {
+ return self.error(token, "expected ';' after ')'");
+ }
+
+ unsafe { (self.pref_fn)(pref_name.as_ptr() as *const c_char, pref_type, pref_value_kind,
+ pref_value, is_sticky) };
+ }
+
+ true
+ }
+
+ fn error(&self, token: Token, msg: &str) -> bool {
+ // If `token` is a Token::Error, it's a lexing error and the error
+ // message is within `token`. Otherwise, it's a parsing error and the
+ // error message is in `msg`.
+ let msg = if let Token::Error(token_msg) = token {
+ token_msg
+ } else {
+ msg
+ };
+ let msg = format!("{}:{}: prefs parse error: {}", self.path, self.line_num, msg);
+ let msg = std::ffi::CString::new(msg).unwrap();
+ unsafe { (self.error_fn)(msg.as_ptr() as *const c_char) };
+
+ false
+ }
+
+ #[inline(always)]
+ fn get_char(&mut self) -> u8 {
+ let c = self.buf[self.i];
+ self.i += 1;
+ c
+ }
+
+ // This function skips the bounds check. Using it at the hottest two call
+ // sites gives a ~15% parsing speed boost.
+ #[inline(always)]
+ unsafe fn get_char_unchecked(&mut self) -> u8 {
+ let c = *self.buf.get_unchecked(self.i);
+ self.i += 1;
+ c
+ }
+
+ #[inline(always)]
+ fn unget_char(&mut self) {
+ debug_assert!(self.i > 0);
+ self.i -= 1;
+ }
+
+ #[inline(always)]
+ fn match_char(&mut self, c: u8) -> bool {
+ if self.buf[self.i] == c {
+ self.i += 1;
+ return true;
+ }
+ false
+ }
+
+ #[inline(always)]
+ fn match_single_line_comment(&mut self) {
+ loop {
+ // To reach here, the previous char must have been '/', and
+ // assertions elsewhere ensure that there must be at least one
+ // subsequent char (the '\0' for EOF).
+ let c = unsafe { self.get_char_unchecked() };
+
+ // All the special chars have value <= b'\r'.
+ if c > b'\r' {
+ continue;
+ }
+ match c {
+ b'\n' => {
+ self.line_num += 1;
+ break;
+ }
+ b'\r' => {
+ self.line_num += 1;
+ self.match_char(b'\n');
+ break;
+ }
+ EOF => {
+ // We must unget the EOF otherwise we'll read past it the
+ // next time around the main loop in get_token(), violating
+ // self.buf's bounds.
+ self.unget_char();
+ break;
+ }
+ _ => continue
+ }
+ }
+ }
+
+ // Returns false if we hit EOF without closing the comment.
+ fn match_multi_line_comment(&mut self) -> bool
+ {
+ loop {
+ match self.get_char() {
+ b'*' => {
+ if self.match_char(b'/') {
+ return true;
+ }
+ }
+ b'\n' => {
+ self.line_num += 1;
+ }
+ b'\r' => {
+ self.line_num += 1;
+ self.match_char(b'\n');
+ }
+ EOF => {
+ return false
+ }
+ _ => continue
+ }
+ }
+ }
+
+ fn match_hex_digits(&mut self, ndigits: i32) -> Option<u16> {
+ debug_assert!(ndigits == 2 || ndigits == 4);
+ let mut value: u16 = 0;
+ for _ in 0..ndigits {
+ value = value << 4;
+ match self.get_char() {
+ c @ b'0'... b'9' => value += (c - b'0') as u16,
+ c @ b'A'...b'F' => value += (c - b'A') as u16 + 10,
+ c @ b'a'...b'f' => value += (c - b'a') as u16 + 10,
+ _ => return None
+ }
+ }
+ Some(value)
+ }
+
+ #[inline(always)]
+ fn char_kind(c: u8) -> CharKind {
+ // Use get_unchecked() because a u8 index cannot exceed this table's
+ // bounds.
+ unsafe { *CHAR_KINDS.get_unchecked(c as usize) }
+ }
+
+ #[inline(always)]
+ fn is_special_string_char(c: u8) -> bool {
+ // Use get_unchecked() because a u8 index cannot exceed this table's
+ // bounds.
+ unsafe { *SPECIAL_STRING_CHARS.get_unchecked(c as usize) }
+ }
+
+ // If the obtained Token has a value, it is put within the Token, unless
+ // it's a string, in which case it's put in `str_buf`. This avoids
+ // allocating a new Vec for every string, which is slow.
+ fn get_token(&mut self, str_buf: &mut Vec<u8>) -> Token {
+ loop {
+ // Note: the following tests are ordered by frequency when parsing
+ // greprefs.js:
+ // - SingleChar 36.7%
+ // - SpaceNL 27.7% (14.9% for spaces, 12.8% for NL)
+ // - Keyword 13.4%
+ // - Quote 11.4%
+ // - Slash 8.1%
+ // - Digit 2.7%
+ // - Hash, CR, Other 0.0%
+
+ let c = self.get_char();
+ match Parser::char_kind(c) {
+ CharKind::SingleChar => {
+ return Token::SingleChar(c);
+ }
+ CharKind::SpaceNL => {
+ // It's slightly faster to combine the handling of the
+ // space chars with NL than to handle them separately; we
+ // have an extra test for this case, but one fewer test for
+ // all the subsequent CharKinds.
+ if c == b'\n' {
+ self.line_num += 1;
+ }
+ continue;
+ }
+ CharKind::Keyword => {
+ let start = self.i - 1;
+ loop {
+ let c = self.get_char();
+ if Parser::char_kind(c) != CharKind::Keyword {
+ self.unget_char();
+ break;
+ }
+ }
+ for info in KEYWORD_INFOS.iter() {
+ if &self.buf[start..self.i] == info.string {
+ return info.token;
+ }
+ }
+ return Token::Error("unknown keyword");
+ }
+ CharKind::Quote => {
+ return self.get_string_token(c, str_buf);
+ }
+ CharKind::Slash => {
+ match self.get_char() {
+ b'/' => {
+ self.match_single_line_comment();
+ }
+ b'*' => {
+ if !self.match_multi_line_comment() {
+ return Token::Error("unterminated /* comment");
+ }
+ }
+ _ => return Token::Error("expected '/' or '*' after '/'")
+ }
+ continue;
+ }
+ CharKind::Digit => {
+ let mut value = (c - b'0') as u32;
+ loop {
+ let c = self.get_char();
+ match Parser::char_kind(c) {
+ CharKind::Digit => {
+ fn add_digit(v: u32, c: u8) -> Option<u32> {
+ // XXX: Once Rust 1.22 is fully supported change this to:
+ // Some(v.checked_mul(10)?.checked_add((c - b'0') as u32)?)
+ if let Some(v) = v.checked_mul(10) {
+ v.checked_add((c - b'0') as u32)
+ } else {
+ None
+ }
+ }
+ if let Some(v) = add_digit(value, c) {
+ value = v;
+ } else {
+ return Token::Error("integer literal overflowed");
+ }
+ }
+ CharKind::Keyword => {
+ // Reject things like "123foo".
+ return Token::Error(
+ "unexpected character in integer literal");
+ }
+ _ => {
+ self.unget_char();
+ break;
+ }
+ }
+ }
+ return Token::Int(value);
+ }
+ CharKind::Hash => {
+ self.match_single_line_comment();
+ continue;
+ }
+ CharKind::CR => {
+ self.match_char(b'\n');
+ self.line_num += 1;
+ continue;
+ }
+ _ => return Token::Error("unexpected character")
+ }
+ }
+ }
+
+ #[inline(always)]
+ fn get_string_token(&mut self, quote_char: u8, str_buf: &mut Vec<u8>) -> Token {
+ // First scan through the string to see if it contains any chars that
+ // need special handling.
+ let start = self.i;
+ let has_special_chars = loop {
+ // To reach here, the previous char must have been a quote
+ // (quote_char), and assertions elsewhere ensure that there must be
+ // at least one subsequent char (the '\0' for EOF).
+ let c = unsafe { self.get_char_unchecked() };
+ if !Parser::is_special_string_char(c) {
+ // Do nothing.
+ } else if c == quote_char {
+ break false;
+ } else {
+ break true;
+ }
+ };
+
+ // Clear str_buf's contents without changing its capacity.
+ str_buf.clear();
+
+ // If there are no special chars (the common case), we can bulk copy it
+ // to str_buf. This is a lot faster than the char-by-char loop below.
+ if !has_special_chars {
+ str_buf.extend(&self.buf[start..self.i - 1]);
+ str_buf.push(b'\0');
+ return Token::String;
+ }
+
+ // There were special chars. Re-scan the string, filling in str_buf one
+ // char at a time.
+ self.i = start;
+ loop {
+ let c = self.get_char();
+ let c2 = if !Parser::is_special_string_char(c) {
+ c
+
+ } else if c == quote_char {
+ break;
+
+ } else if c == b'\\' {
+ match self.get_char() {
+ b'\"' => b'\"',
+ b'\'' => b'\'',
+ b'\\' => b'\\',
+ b'n' => b'\n',
+ b'r' => b'\r',
+ b'x' => {
+ if let Some(value) = self.match_hex_digits(2) {
+ debug_assert!(value <= 0xff);
+ if value != 0 {
+ value as u8
+ } else {
+ return Token::Error("\\x00 is not allowed");
+ }
+ } else {
+ return Token::Error("malformed \\x escape sequence");
+ }
+ }
+ b'u' => {
+ if let Some(value) = self.match_hex_digits(4) {
+ let mut utf16 = vec![value];
+ if 0xd800 == (0xfc00 & value) {
+ // High surrogate value. Look for the low surrogate value.
+ if self.match_char(b'\\') && self.match_char(b'u') {
+ if let Some(lo) = self.match_hex_digits(4) {
+ if 0xdc00 == (0xfc00 & lo) {
+ // Found a valid low surrogate.
+ utf16.push(lo);
+ } else {
+ return Token::Error(
+ "invalid low surrogate value after high surrogate");
+ }
+ }
+ }
+ if utf16.len() != 2 {
+ return Token::Error(
+ "expected low surrogate after high surrogate");
+ }
+ } else if value == 0 {
+ return Token::Error("\\u0000 is not allowed");
+ }
+
+ // Insert the UTF-16 sequence as UTF-8.
+ let utf8 = String::from_utf16(&utf16).unwrap();
+ str_buf.extend(utf8.as_bytes());
+ } else {
+ return Token::Error("malformed \\u escape sequence");
+ }
+ continue; // We don't want to str_buf.push(c2) below.
+ }
+ _ => return Token::Error("unexpected escape sequence character after '\\'")
+ }
+
+ } else if c == b'\n' {
+ self.line_num += 1;
+ c
+
+ } else if c == b'\r' {
+ self.line_num += 1;
+ if self.match_char(b'\n') {
+ str_buf.push(b'\r');
+ b'\n'
+ } else {
+ c
+ }
+
+ } else if c == EOF {
+ return Token::Error("unterminated string literal");
+
+ } else {
+ // This case is only hit for the non-closing quote char.
+ debug_assert!((c == b'\'' || c == b'\"') && c != quote_char);
+ c
+ };
+ str_buf.push(c2);
+ }
+ str_buf.push(b'\0');
+ return Token::String;
+ }
+}
new file mode 100644
--- /dev/null
+++ b/modules/libpref/test/gtest/Parser.cpp
@@ -0,0 +1,533 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "gtest/gtest.h"
+#include "mozilla/ArrayUtils.h"
+
+// Keep this in sync with the declaration in Preferences.cpp.
+//
+// It's declared here to avoid polluting Preferences.h with test-only stuff.
+void
+TestParseError(const char* aText, nsCString& aErrorMsg);
+
+TEST(PrefsParser, Errors)
+{
+ nsAutoCStringN<128> actualErrorMsg;
+
+// Use a macro rather than a function so that the line number reported by
+// gtest on failure is useful.
+#define P(text_, expectedErrorMsg_) \
+ do { \
+ TestParseError(text_, actualErrorMsg); \
+ ASSERT_STREQ(expectedErrorMsg_, actualErrorMsg.get()); \
+ } while (0)
+
+ // clang-format off
+
+ //-------------------------------------------------------------------------
+ // Valid syntax, just as a sanity test. (More thorough testing of valid syntax
+ // and semantics is done in modules/libpref/test/unit/test_parser.js.)
+ //-------------------------------------------------------------------------
+
+ P(R"(
+pref("bool", true);
+sticky_pref("int", 123);
+user_pref("string", "value");
+ )",
+ ""
+ );
+
+ //-------------------------------------------------------------------------
+ // All the lexing errors. (To be pedantic, some of the integer literal
+ // overflows are triggered in the parser, but put them all here so they're all
+ // in the one spot.)
+ //-------------------------------------------------------------------------
+
+ // Integer overflow errors.
+
+ P(R"(
+pref("int.ok", 2147483647);
+pref("int.overflow", 2147483648);
+ )",
+ "test:3: prefs parse error: integer literal overflowed");
+
+ P(R"(
+pref("int.ok", +2147483647);
+pref("int.overflow", +2147483648);
+ )",
+ "test:3: prefs parse error: integer literal overflowed"
+ );
+
+ P(R"(
+pref("int.ok", -2147483648);
+pref("int.overflow", -2147483649);
+ )",
+ "test:3: prefs parse error: integer literal overflowed"
+ );
+
+ P(R"(
+pref("int.overflow", 4294967296);
+ )",
+ "test:2: prefs parse error: integer literal overflowed"
+ );
+
+ P(R"(
+pref("int.overflow", +4294967296);
+ )",
+ "test:2: prefs parse error: integer literal overflowed"
+ );
+
+ P(R"(
+pref("int.overflow", -4294967296);
+ )",
+ "test:2: prefs parse error: integer literal overflowed"
+ );
+
+ P(R"(
+pref("int.overflow", 4294967297);
+ )",
+ "test:2: prefs parse error: integer literal overflowed"
+ );
+
+ P(R"(
+pref("int.overflow", 1234567890987654321);
+ )",
+ "test:2: prefs parse error: integer literal overflowed"
+ );
+
+ // Other integer errors.
+
+ P(R"(
+pref("int.unexpected", 100foo);
+ )",
+ "test:2: prefs parse error: unexpected character in integer literal"
+ );
+
+ // \x escape errors.
+
+ // \x00 is not allowed.
+ P(R"(
+pref("string.bad-x-escape", "foo\x00bar");
+ )",
+ "test:2: prefs parse error: \\x00 is not allowed"
+ );
+
+ // End of string after \x.
+ P(R"(
+pref("string.bad-x-escape", "foo\x");
+ )",
+ "test:2: prefs parse error: malformed \\x escape sequence"
+ );
+
+ // Punctuation after \x.
+ P(R"(
+pref("string.bad-x-escape", "foo\x,bar");
+ )",
+ "test:2: prefs parse error: malformed \\x escape sequence"
+ );
+
+ // Space after \x.
+ P(R"(
+pref("string.bad-x-escape", "foo\x 12");
+ )",
+ "test:2: prefs parse error: malformed \\x escape sequence"
+ );
+
+ // Newline after \x.
+ P(R"(
+pref("string.bad-x-escape", "foo\x
+12");
+ )",
+ "test:2: prefs parse error: malformed \\x escape sequence"
+ );
+
+ // EOF after \x.
+ P(R"(
+pref("string.bad-x-escape", "foo\x)",
+ "test:2: prefs parse error: malformed \\x escape sequence"
+ );
+
+ // Not enough hex digits.
+ P(R"(
+pref("string.bad-x-escape", "foo\x1");
+ )",
+ "test:2: prefs parse error: malformed \\x escape sequence"
+ );
+
+ // Invalid hex digit.
+ P(R"(
+pref("string.bad-x-escape", "foo\x1G");
+ )",
+ "test:2: prefs parse error: malformed \\x escape sequence"
+ );
+
+ // \\u escape errors.
+
+ // \\u0000 is not allowed.
+ P(R"(
+pref("string.bad-u-escape", "foo\u0000 bar");
+ )",
+ "test:2: prefs parse error: \\u0000 is not allowed"
+ );
+
+ // End of string after \u.
+ P(R"(
+pref("string.bad-u-escape", "foo\u");
+ )",
+ "test:2: prefs parse error: malformed \\u escape sequence"
+ );
+
+ // Punctuation after \u.
+ P(R"(
+pref("string.bad-u-escape", "foo\u,bar");
+ )",
+ "test:2: prefs parse error: malformed \\u escape sequence"
+ );
+
+ // Space after \u.
+ P(R"(
+pref("string.bad-u-escape", "foo\u 1234");
+ )",
+ "test:2: prefs parse error: malformed \\u escape sequence"
+ );
+
+ // Newline after \u.
+ P(R"(
+pref("string.bad-u-escape", "foo\u
+1234");
+ )",
+ "test:2: prefs parse error: malformed \\u escape sequence"
+ );
+
+ // EOF after \u.
+ P(R"(
+pref("string.bad-u-escape", "foo\u)",
+ "test:2: prefs parse error: malformed \\u escape sequence"
+ );
+
+ // Not enough hex digits.
+ P(R"(
+pref("string.bad-u-escape", "foo\u1");
+ )",
+ "test:2: prefs parse error: malformed \\u escape sequence"
+ );
+
+ // Not enough hex digits.
+ P(R"(
+pref("string.bad-u-escape", "foo\u12");
+ )",
+ "test:2: prefs parse error: malformed \\u escape sequence"
+ );
+
+ // Not enough hex digits.
+ P(R"(
+pref("string.bad-u-escape", "foo\u123");
+ )",
+ "test:2: prefs parse error: malformed \\u escape sequence"
+ );
+
+ // Invalid hex digit.
+ P(R"(
+pref("string.bad-u-escape", "foo\u1G34");
+ )",
+ "test:2: prefs parse error: malformed \\u escape sequence"
+ );
+
+ // High surrogate not followed by low surrogate.
+ P(R"(
+pref("string.bad-u-surrogate", "foo\ud83c,blah");
+ )",
+ "test:2: prefs parse error: expected low surrogate after high surrogate"
+ );
+
+ // High surrogate followed by invalid low surrogate value.
+ P(R"(
+pref("string.bad-u-surrogate", "foo\ud83c\u1234");
+ )",
+ "test:2: prefs parse error: invalid low surrogate value after high surrogate"
+ );
+
+ // Bad escape characters.
+
+ // Unlike in JavaScript, \b isn't allowed.
+ P(R"(
+pref("string.bad-escape", "foo\v");
+ )",
+ "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+ );
+
+ // Unlike in JavaScript, \f isn't allowed.
+ P(R"(
+pref("string.bad-escape", "foo\f");
+ )",
+ "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+ );
+
+ // Unlike in JavaScript, \t isn't allowed.
+ P(R"(
+pref("string.bad-escape", "foo\t");
+ )",
+ "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+ );
+
+ // Unlike in JavaScript, \v isn't allowed.
+ P(R"(
+pref("string.bad-escape", "foo\v");
+ )",
+ "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+ );
+
+ // Non-special letter after \.
+ P(R"(
+pref("string.bad-escape", "foo\Q");
+ )",
+ "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+ );
+
+ // Number after \.
+ P(R"(
+pref("string.bad-escape", "foo\1");
+ )",
+ "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+ );
+
+ // Punctuation after \.
+ P(R"(
+pref("string.bad-escape", "foo\,");
+ )",
+ "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+ );
+
+ // Space after \.
+ P(R"(
+pref("string.bad-escape", "foo\ n");
+ )",
+ "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+ );
+
+ // Newline after \.
+ P(R"(
+pref("string.bad-escape", "foo\
+n");
+ )",
+ "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+ );
+
+ // EOF after \.
+ P(R"(
+pref("string.bad-escape", "foo\)",
+ "test:2: prefs parse error: unexpected escape sequence character after '\\'"
+ );
+
+ // Unterminated string literals.
+
+ // Simple case.
+ P(R"(
+pref("string.unterminated-string", "foo
+ )",
+ "test:3: prefs parse error: unterminated string literal"
+ );
+
+ // Mismatched quotes (1).
+ P(R"(
+pref("string.unterminated-string", "foo');
+ )",
+ "test:3: prefs parse error: unterminated string literal"
+ );
+
+ // Mismatched quotes (2).
+ P(R"(
+pref("string.unterminated-string", 'foo");
+ )",
+ "test:3: prefs parse error: unterminated string literal"
+ );
+
+ // Unknown keywords
+
+ P(R"(
+foo
+ )",
+ "test:2: prefs parse error: unknown keyword"
+ );
+
+ P(R"(
+preff("string.bad-keyword", true);
+ )",
+ "test:2: prefs parse error: unknown keyword"
+ );
+
+ P(R"(
+ticky_pref("string.bad-keyword", true);
+ )",
+ "test:2: prefs parse error: unknown keyword"
+ );
+
+ P(R"(
+User_pref("string.bad-keyword", true);
+ )",
+ "test:2: prefs parse error: unknown keyword"
+ );
+
+ P(R"(
+pref("string.bad-keyword", TRUE);
+ )",
+ "test:2: prefs parse error: unknown keyword"
+ );
+
+ // Unterminated C-style comment
+ P(R"(
+/* comment
+ )",
+ "test:3: prefs parse error: unterminated /* comment"
+ );
+
+ // Malformed comments.
+
+ P(R"(
+/ comment
+ )",
+ "test:2: prefs parse error: expected '/' or '*' after '/'"
+ );
+
+ // Unexpected characters
+
+ P(R"(
+pref("unexpected.chars", &true);
+ )",
+ "test:2: prefs parse error: unexpected character"
+ );
+
+ P(R"(
+pref("unexpected.chars" : true);
+ )",
+ "test:2: prefs parse error: unexpected character"
+ );
+
+ P(R"(
+@pref("unexpected.chars", true);
+ )",
+ "test:2: prefs parse error: unexpected character"
+ );
+
+ P(R"(
+pref["unexpected.chars": true];
+ )",
+ "test:2: prefs parse error: unexpected character"
+ );
+
+ //-------------------------------------------------------------------------
+ // All the parsing errors.
+ //-------------------------------------------------------------------------
+
+ P(R"(
+"pref"("parse.error": true);
+ )",
+ "test:2: prefs parse error: expected pref specifier at start of pref definition"
+ );
+
+ P(R"(
+pref1("parse.error": true);
+ )",
+ "test:2: prefs parse error: expected '(' after pref specifier"
+ );
+
+ P(R"(
+pref(123: true);
+ )",
+ "test:2: prefs parse error: expected pref name after '('"
+ );
+
+ P(R"(
+pref("parse.error" true);
+ )",
+ "test:2: prefs parse error: expected ',' after pref name"
+ );
+
+ P(R"(
+pref("parse.error", -true);
+ )",
+ "test:2: prefs parse error: expected integer literal after '-'"
+ );
+
+ P(R"(
+pref("parse.error", +"value");
+ )",
+ "test:2: prefs parse error: expected integer literal after '+'"
+ );
+
+ P(R"(
+pref("parse.error", pref);
+ )",
+ "test:2: prefs parse error: expected pref value after ','"
+ );
+
+ P(R"(
+pref("parse.error", true;
+ )",
+ "test:2: prefs parse error: expected ')' after pref value"
+ );
+
+ P(R"(
+pref("parse.error", true)
+pref("parse.error", true)
+ )",
+ "test:3: prefs parse error: expected ';' after ')'"
+ );
+
+ // This is something we saw in practice with the old parser, which allowed
+ // repeated semicolons.
+ P(R"(
+pref("parse.error", true);;
+ )",
+ "test:2: prefs parse error: expected pref specifier at start of pref definition"
+ );
+
+ //-------------------------------------------------------------------------
+ // Invalid syntax after various newline combinations, for the purpose of
+ // testing that line numbers are correct.
+ //-------------------------------------------------------------------------
+
+ // In all of the following we have a \n, a \r, a \r\n, and then an error, so
+ // the error is on line 4.
+
+ P(R"(
+
+bad
+ )",
+ "test:4: prefs parse error: unknown keyword"
+ );
+
+ P(R"(#
+#
#
+bad
+ )",
+ "test:4: prefs parse error: unknown keyword"
+ );
+
+ P(R"(//
+//
//
+bad
+ )",
+ "test:4: prefs parse error: unknown keyword"
+ );
+
+ P(R"(/*
+
+*/ bad
+ )",
+ "test:4: prefs parse error: unknown keyword"
+ );
+
+ // Note: the escape sequences do *not* affect the line number.
+ P(R"(pref("foo\n
+\r
foo\r\n
+foo", bad
+ )",
+ "test:4: prefs parse error: unknown keyword"
+ );
+
+ // clang-format on
+}