Bug 1349917 - Pseudo-implementation of lazy parsing of functions draft
authorDavid Teller <dteller@mozilla.com>
Tue, 25 Apr 2017 15:56:02 +0200
changeset 567764 577f734dc70ab25065edce91e4a6d0a402c958f0
parent 567763 6e4b9a97f7b7e13e2c8383ed1efc0891456c90c6
child 569947 b73d0a187da4153b5260e8763cdbf98e04ecdf3f
push id55689
push userdteller@mozilla.com
push dateTue, 25 Apr 2017 14:21:34 +0000
bugs1349917
milestone55.0a1
Bug 1349917 - Pseudo-implementation of lazy parsing of functions We don't really have the data structures to store lazy parsing data, so this patch just heap-allocates some data to represent the cost of storing this lazy parsing data. With this patch, we get the following speedups on Facebook for the AST creation phase (which takes ~700 ms from source): * no skipping: *0.31 * skipping all nested functions: *0.12 * skipping all functions: *0.04 All measures are done in optimized build. MozReview-Commit-ID: 6mR0x36l5EW
js/src/frontend/BinaryAST.cpp
js/src/frontend/BinaryAST.h
js/src/jsobj.h
js/src/shell/js.cpp
--- a/js/src/frontend/BinaryAST.cpp
+++ b/js/src/frontend/BinaryAST.cpp
@@ -4,20 +4,22 @@
 #include <iostream>
 #include <istream>
 #include <sstream>
 #include <strstream>
 #include <unordered_map>
 #include <vector>
 
 #include "mozilla/Attributes.h"
+#include "mozilla/DebugOnly.h"
 #include "frontend/BinaryAST.h"
 #include "frontend/ParseNode.h"
 #include "jsatom.h"
 #include "jsopcode.h"
+#include "SharedContext.h"
 
 namespace js {
 namespace frontend {
 
 // #define DEBUG_HEADER 1
 
 using VariantKind = StringTree::VariantKind;
 using StringTable = std::unordered_map<std::string, uint32_t>;
@@ -261,23 +263,25 @@ public:
             totalLength += iter;
         }
 
         // std::cerr << "treeSerialize: writing " << lengths.size() << " strings, for a total length of " << totalLength << "\n";
 
         // Now write strings in order.
         // To extract a string, you first need to have read the lengths table.
         MOZ_ASSERT(heap.size() == lengths.size());
-        const size_t startStrings = out.tellp();
+        const mozilla::DebugOnly<std::ostringstream::pos_type> startStrings = out.tellp();
         for (auto iter: heap) {
             out.write(&iter.first[0], iter.first.length());
         }
 
         MOZ_ASSERT((size_t)out.tellp() == (size_t)(startStrings + totalLength));
 
+        // std::cerr << "treeSerialize: body starts at " << out.tellp() << "\n";
+
         // Finally, write tree
         write(tree.get(), out);
 
         return true;
     }
 
 private:
     void computeOccurrences(StringTree* tree, std::unordered_map<std::string, uint32_t>& occurrences) {
@@ -295,17 +299,17 @@ private:
         for (const UniquePtr<StringTree>& tree: concat.children) {
             computeOccurrences(tree.get(), occurrences);
         }
     }
 
     MOZ_MUST_USE StringTree* treeSerializeAux(const ParseNode* node)
     {
         const ParseNodeKind kind = node ? node->getKind() : PNK_LIMIT;
-    //    // std::cerr << "treeSerialize " << NAMES[kind] << "\n";
+        // std::cerr << "treeSerialize " << NAMES[kind] << "\n";
         debug << NAMES[kind] << " ";
 
         switch (kind) {
             // Empty tree
             case PNK_LIMIT: {
                 return StringTree::makeLeaf(cx, PNK_LIMIT);
             }
             // Body
@@ -603,51 +607,52 @@ public:
                     write(tree.get(), sub_out);
                 }
 
                 const std::string sub_str = sub_out.str();
                 const size_t byteLength = sub_str.length();
                 writeUnsigned(out, byteLength);
                 MOZ_ASSERT(byteLength != 0);
 
-                const size_t byteStart = out.tellp();
+                const mozilla::DebugOnly<std::ostringstream::pos_type> byteStart = out.tellp();
                 out << sub_str;
-                const size_t byteStop = out.tellp();
+                const mozilla::DebugOnly<std::ostringstream::pos_type> byteStop = out.tellp();
                 // std::cerr << "StringTree::write length at " << byteStart << "\n";
                 MOZ_ASSERT(byteLength == byteStop - byteStart);
             }
         }
 
 
     #if DEBUG_HEADER
         out.write(FOOTER, sizeof FOOTER - 1);
     #endif // DEBUG_HEADER
     }
 };
 
 class TreeParser MOZ_RAII {
 public:
-    TreeParser(JSContext* cx_, ParseNodeAllocator& alloc, std::istringstream& in_, std::ostringstream& debug_)
+    TreeParser(JSContext* cx_, ParseNodeAllocator& alloc, std::istringstream& in_, std::ostringstream& debug_,
+        BinParseOptions& options_)
         : cx(cx_)
         , in(in_)
         , debug(debug_)
         , functionDepth(0)
         , numberOfNodes(0)
         , currentByteLength((size_t)-1)
         , currentByteStart((size_t)-1)
         , currentKind((ParseNodeKind)-1)
         , currentVariant((VariantKind)-1)
         , allocator(alloc)
+        , options(options_)
     {
         // std::cerr << "TreeParser starts with offset " << in.tellg() << "\n";
     }
     ~TreeParser() {
         MOZ_ASSERT(in.peek() == std::char_traits<char>::eof());
         MOZ_ASSERT(functionDepth == 0);
-        // std::cerr << "TreeParser complete after " << numberOfNodes << " nodes\n";
     }
     ParseNode* parseNode() {
         // Read table of lengths of strings
         const uint32_t numberOfStrings = readVariableLength();
         if (!lengthsTable.initCapacity(numberOfStrings)) {
             MOZ_CRASH();
         }
         for (uint32_t i = 0; i < numberOfStrings; ++i) {
@@ -664,16 +669,18 @@ public:
             std::string data;
             data.reserve(iter);
             in.read(&data[0], iter);
             if (!stringsTable.append(Move(data))) {
                 MOZ_CRASH();
             }
         }
 
+        globalBodyStart = in.tellg();
+        // std::cerr << "ParseNode: body starts at " << globalBodyStart << "\n";
         return parseNodeAux();
     }
 
 private:
 
     // Parse a subtree as a string. The subtree must have been serialized as a string.
     // FIXME: Crappy documentation suggests crappy concepts. Clean this up.
     uint32_t parseLeaf(ParseNodeKind& kind)
@@ -750,16 +757,28 @@ private:
 #else
         currentVariant = VariantKind::IsLeaf;
 #endif
         currentByteStart = -1; // Ignored for leaves.
 
         return readVariableLength();
     }
 
+    void skipConcatNode() {
+        MOZ_ASSERT(currentKind == PNK_FUNCTION);
+#if DEBUG_HEADER
+        MOZ_ASSERT(currentVariant == VariantKind::IsConcat);
+#else
+        currentVariant = VariantKind::IsConcat;
+#endif
+        currentByteLength = readVariableLength();
+        currentByteStart = in.tellg();
+        in.seekg(currentByteLength, in.cur);
+    }
+
     size_t readNodeAsConcat() {
         MOZ_ASSERT(currentKind != PNK_LIMIT);
 #if DEBUG_HEADER
         MOZ_ASSERT(currentVariant == VariantKind::IsConcat);
 #else
         currentVariant = VariantKind::IsConcat;
 #endif
 
@@ -781,23 +800,27 @@ private:
 private:
     JSContext* cx;
     std::istringstream& in;
     std::ostringstream& debug;
     size_t functionDepth;
     size_t numberOfNodes;
     size_t currentByteLength;
     size_t currentByteStart;
+    size_t globalBodyStart;
     ParseNodeKind currentKind;
     VariantKind currentVariant;
     mozilla::Vector<uint32_t> lengthsTable;
     mozilla::Vector<std::string> stringsTable;
+    ParseNodeAllocator& allocator;
+    BinParseOptions options;
 
     JS_DECLARE_NEW_METHODS(new_, allocParseNode, inline)
-    ParseNodeAllocator& allocator;
+
+
     ParseNode* allocParseNode(size_t size) {
         MOZ_ASSERT(size == sizeof(ParseNode));
         return static_cast<ParseNode*>(allocator.allocNode());
     }
 
     void handleHeader()
     {
         ++numberOfNodes;
@@ -832,19 +855,19 @@ private:
 #endif // DEBUG_HEADER
         }
     }
 
     void handleFooter() {
         // std::cerr << "handleFooter " << in.tellg() << "\n";
 
         // Check byteLength, footer
-
+        MOZ_ASSERT(currentVariant == VariantKind::IsConcat || currentVariant == VariantKind::IsLeaf);
         if (currentVariant == VariantKind::IsConcat) {
-            const size_t byteStop = in.tellg();
+            const mozilla::DebugOnly<std::istringstream::pos_type> byteStop = in.tellg();
             MOZ_ASSERT(byteStop - currentByteStart == currentByteLength);
         }
 
 #if DEBUG_HEADER
         std::string footer(sizeof FOOTER - 1, '\0');
         in.read(&footer[0], sizeof FOOTER - 1);
         if (footer.compare(0, sizeof FOOTER - 1, FOOTER) != 0) {
             MOZ_CRASH("Bad footer");
@@ -876,57 +899,74 @@ private:
     void getLeafAsString(uint32_t id, std::string& data)
     {
         data = stringsTable[id];
     }
 
     MOZ_MUST_USE ParseNode* parseNode(ParseNodeKind kind)
     {
         debug << NAMES[kind] << " ";
-        // std::cerr << "parseNode " << NAMES[kind] << "\n";
 
         switch (kind) {
             // Empty subtree.
             case PNK_LIMIT: {
                 return nullptr;
             }
 
             case PNK_LEXICALSCOPE: {
-                const size_t length = readNodeAsConcat();
+                const mozilla::DebugOnly<size_t> length = readNodeAsConcat();
                 MOZ_ASSERT(length == 1);
 
                 UniquePtr<ParseNode> body(parseNodeAux());
                 MOZ_ASSERT(body);
 
                 return new_<LexicalScopeNode>(nullptr, body.release());
             }
 
             case PNK_FUNCTION: {
+                // Allocate a value to simulate the cost of really storing the
+                // data that would let us parse lazily.
+                struct BogusFunctionData {
+                    size_t codeStart;
+                    size_t byteLength;
+                };
+
+                if (functionDepth == options.lazyParseAfterFunctionDepth) {
+                    UniquePtr<CodeNode> result(new_<CodeNode>(kind, JSOP_NOP, TokenPos()));
+                    UniquePtr<BogusFunctionData> position(new BogusFunctionData());
+                    position->codeStart = in.tellg();
+                    skipConcatNode();
+                    position->byteLength = currentByteLength;
+
+                    return result.release();
+                }
+
                 ++functionDepth;
-                const size_t length = readNodeAsConcat();
+                const mozilla::DebugOnly<size_t> length = readNodeAsConcat();
                 MOZ_ASSERT(length == 1);
 
                 UniquePtr<ParseNode> body(parseNodeAux());
                 MOZ_ASSERT(body);
 
                 CodeNode* result(new_<CodeNode>(kind, JSOP_NOP, TokenPos()));
                 result->pn_body = body.release();
+//                result->pn_funbox = box;
                 --functionDepth;
                 return result;
             }
 
             case PNK_IF: MOZ_FALLTHROUGH;
             case PNK_IMPORT_SPEC: MOZ_FALLTHROUGH;
             case PNK_EXPORT: MOZ_FALLTHROUGH;
             case PNK_FORHEAD:  MOZ_FALLTHROUGH;
             case PNK_CONDITIONAL: MOZ_FALLTHROUGH;
             case PNK_CLASS: MOZ_FALLTHROUGH;
             case PNK_TRY: MOZ_FALLTHROUGH;
             case PNK_CATCH: {
-                const size_t length = readNodeAsConcat();
+                const mozilla::DebugOnly<size_t> length = readNodeAsConcat();
                 MOZ_ASSERT(length == 3);
 
                 UniquePtr<ParseNode> kid1(parseNodeAux());
                 UniquePtr<ParseNode> kid2(parseNodeAux());
                 UniquePtr<ParseNode> kid3(parseNodeAux());
                 return new_<TernaryNode>(kind, JSOP_NOP, kid1.release(), kid2.release(), kid3.release(), TokenPos());
             }
 
@@ -957,17 +997,17 @@ private:
             case PNK_SWITCH: MOZ_FALLTHROUGH;
             case PNK_IMPORT: MOZ_FALLTHROUGH;
             case PNK_EXPORT_SPEC: MOZ_FALLTHROUGH;
             case PNK_CLASSMETHOD: MOZ_FALLTHROUGH;
             case PNK_CLASSNAMES: MOZ_FALLTHROUGH;
             case PNK_NEWTARGET: MOZ_FALLTHROUGH;
             case PNK_FORIN: MOZ_FALLTHROUGH;
             case PNK_FOROF: {
-                const size_t length = readNodeAsConcat();
+                const mozilla::DebugOnly<size_t> length = readNodeAsConcat();
                 MOZ_ASSERT(length == 2);
 
                 UniquePtr<ParseNode> left(parseNodeAux());
                 UniquePtr<ParseNode> right(parseNodeAux());
 
                 if (kind == PNK_CASE) {
                     return new_<CaseClause>(left.release(), right.release(), 0);
                 } else if (kind == PNK_FORIN || kind == PNK_FOROF) {
@@ -1002,17 +1042,17 @@ private:
             case PNK_POSTINCREMENT: MOZ_FALLTHROUGH;
             case PNK_PREDECREMENT: MOZ_FALLTHROUGH;
             case PNK_POSTDECREMENT: MOZ_FALLTHROUGH;
             case PNK_DELETENAME: MOZ_FALLTHROUGH;
             case PNK_DELETEPROP: MOZ_FALLTHROUGH;
             case PNK_DELETEELEM: MOZ_FALLTHROUGH;
             case PNK_DELETEEXPR: MOZ_FALLTHROUGH;
             case PNK_MUTATEPROTO: {
-                const size_t length = readNodeAsConcat();
+                const mozilla::DebugOnly<size_t> length = readNodeAsConcat();
                 MOZ_ASSERT(length == 1);
 
                 return new_<UnaryNode>(kind, JSOP_NOP, TokenPos(), parseNodeAux());
             }
 
             // Linked lists
             case PNK_CLASSMETHODLIST: MOZ_FALLTHROUGH;
             case PNK_EXPORT_SPEC_LIST: MOZ_FALLTHROUGH;
@@ -1092,17 +1132,17 @@ private:
                     return new_<BreakStatement>(label, TokenPos());
                 } else {
                     return new_<ContinueStatement>(label, TokenPos());
                 }
             }
 
             case PNK_LABEL: MOZ_FALLTHROUGH;
             case PNK_DOT: {
-                const size_t length = readNodeAsConcat();
+                const mozilla::DebugOnly<size_t> length = readNodeAsConcat();
                 MOZ_ASSERT(length == 2);
 
                 ParseNodeKind subkind;
                 std::string leaf;
                 getLeafAsString(parseLeaf(subkind), leaf);
                 MOZ_ASSERT(subkind == kind);
                 RootedAtom atom(cx, deserializeAtom(cx, leaf));
 
@@ -1186,15 +1226,17 @@ private:
     }
 };
 
 
 MOZ_MUST_USE bool binSerialize(JSContext* cx, const ParseNode* node, std::ostringstream& out, std::ostringstream& debug) {
     TreeSerializer serializer(cx, debug);
     return serializer.treeSerialize(node, out);
 }
-MOZ_MUST_USE ParseNode* binParse(JSContext* cx, ParseNodeAllocator& alloc, std::istringstream& in, std::ostringstream& debug) {
-    TreeParser parser(cx, alloc, in, debug);
+MOZ_MUST_USE ParseNode* binParse(JSContext* cx, ParseNodeAllocator& alloc, std::istringstream& in,
+    std::ostringstream& debug, BinParseOptions& options
+) {
+    TreeParser parser(cx, alloc, in, debug, options);
     return parser.parseNode();
 }
 
 } // namespace frontend
 } // namespace js
--- a/js/src/frontend/BinaryAST.h
+++ b/js/src/frontend/BinaryAST.h
@@ -5,18 +5,28 @@
 #include <istream>
 #include <strstream>
 
 #include "frontend/ParseNode.h"
 
 namespace js {
 namespace frontend {
 
+struct BinParseOptions {
+    // When reaching a depth of nested functions larger than
+    // `lazyParseAfterFunctionDepth`, switch to lazy parsing.
+    uint32_t lazyParseAfterFunctionDepth;
+
+    BinParseOptions()
+        : lazyParseAfterFunctionDepth(UINT32_MAX)
+    { }
+};
+
 MOZ_MUST_USE bool binSerialize(JSContext* cx, const ParseNode* node, std::ostringstream& out, std::ostringstream& debug);
-MOZ_MUST_USE ParseNode* binParse(JSContext* cx, ParseNodeAllocator& alloc, std::istringstream& out, std::ostringstream& debug);
+MOZ_MUST_USE ParseNode* binParse(JSContext* cx, ParseNodeAllocator& alloc, std::istringstream& out, std::ostringstream& debug, BinParseOptions& options);
 
 // A fast-concatenation-of-strings data structure.
 struct StringTree {
     struct Leaf {
         const std::string data;
         Leaf(const std::string&& data);
         Leaf(); // Empty leaf
     };
--- a/js/src/jsobj.h
+++ b/js/src/jsobj.h
@@ -119,17 +119,17 @@ class JSObject : public js::gc::Cell
     static js::ObjectGroup* makeLazyGroup(JSContext* cx, js::HandleObject obj);
 
   public:
     bool isNative() const {
         return getClass()->isNative();
     }
 
     const js::Class* getClass() const {
-        return group_->clasp();
+        return group_ ? group_->clasp() : nullptr; // FIXME: Just for testing.
     }
     const JSClass* getJSClass() const {
         return Jsvalify(getClass());
     }
     bool hasClass(const js::Class* c) const {
         return getClass() == c;
     }
 
--- a/js/src/shell/js.cpp
+++ b/js/src/shell/js.cpp
@@ -4420,16 +4420,40 @@ ReadBinaryAST(JSContext* cx, unsigned ar
         return false;
     }
     if (!args[0].isObject()) {
         const char* typeName = InformalValueTypeName(args[0]);
         JS_ReportErrorASCII(cx, "expected object (typed array) to parse, got %s", typeName);
         return false;
     }
 
+    BinParseOptions options;
+
+    if (args.length() >= 2) {
+        if (args[1].isPrimitive()) {
+            JS_ReportErrorNumberASCII(cx, my_GetErrorMessage, nullptr, JSSMSG_INVALID_ARGS,
+                                      "evaluate");
+            return false;
+        }
+
+        RootedObject opts(cx, &args[1].toObject());
+
+        RootedValue val(cx);
+        if (!JS_GetProperty(cx, opts, "lazyParseAfterFunctionDepth", &val)) {
+            return false;
+        }
+        if (!val.isUndefined()) {
+            double out;
+            if (!ToNumber(cx, val, &out)) {
+                return false;
+            }
+            options.lazyParseAfterFunctionDepth = out;
+        }
+    }
+
     RootedObject obj(cx, &args[0].toObject());
     if (!JS_IsArrayBufferObject(obj)) {
         const char* typeName = InformalValueTypeName(args[0]);
         JS_ReportErrorASCII(cx, "expected typed array to parse, got %s", typeName);
         return false;
     }
 
     uint32_t buf_length = 0;
@@ -4444,17 +4468,17 @@ ReadBinaryAST(JSContext* cx, unsigned ar
     std::cerr << "ReadBinaryAST from " << buf_length << " bytes at " << (void*)buf_data << "\n";
     std::cerr << "ReadBinaryAST string: " << buf_str.length() << "\n";
 
     std::ostringstream _debug; // Ignored.
 //    in_stream.exceptions(in_stream.exceptions() | std::istringstream::badbit | std::istringstream::failbit | std::istringstream::eofbit );
 
     ParseNodeAllocator allocator(cx, cx->tempLifoAlloc());
     const mozilla::TimeStamp start = mozilla::TimeStamp::Now();
-    if (!binParse(cx, allocator, in_stream, _debug)) {
+    if (!binParse(cx, allocator, in_stream, _debug, options)) {
         return false;
     }
     const mozilla::TimeStamp stop = mozilla::TimeStamp::Now();
     std::cerr << "ReadBinaryAST duration: " << (stop-start).ToMilliseconds() << "ms\n";
     args.rval().setUndefined();
     return true;
 }
 
@@ -6630,21 +6654,22 @@ static const JSFunctionSpecWithHelp shel
 "  Parses a string, potentially throwing."),
 
     JS_FN_HELP("syntaxParse", SyntaxParse, 1, 0,
 "syntaxParse(code)",
 "  Check the syntax of a string, returning success value"),
 
     JS_FN_HELP("jsbinSerialize", CreateBinaryAST, 1, 0,
 "jsbinSerialize(source)",
-"  Compile a source string into a binary AST."),
+"  Compile a source string into a binary AST.\n"),
 
 JS_FN_HELP("jsbinParse", ReadBinaryAST, 1, 0,
-"jsbinParse(typedarray)",
-"  Read an AST from a binary AST."),
+"jsbinParse(typedarray, [options])",
+"  Read an AST from a binary AST\n"
+"    lazyParseAfterFunctionDepth: (number).\n"),
 
     JS_FN_HELP("offThreadCompileScript", OffThreadCompileScript, 1, 0,
 "offThreadCompileScript(code[, options])",
 "  Compile |code| on a helper thread. To wait for the compilation to finish\n"
 "  and run the code, call |runOffThreadScript|. If present, |options| may\n"
 "  have properties saying how the code should be compiled:\n"
 "      noScriptRval: use the no-script-rval compiler option (default: false)\n"
 "      fileName: filename for error messages and debug info\n"