Bug 1344152 - Find out how many scripts are in ASCII;r?shu draft
authorDavid Teller <dteller@mozilla.com>
Fri, 03 Mar 2017 22:03:20 +0100
changeset 493709 f0e0145324285529d490ae93e07fb72736728a8e
parent 493708 8f6af8eda70bbe5889404f03fda6792721126b12
child 547914 3e0450be7ae786bd9c2ecafe07a7e0b30148bfd7
push id47823
push userdteller@mozilla.com
push dateSun, 05 Mar 2017 18:08:36 +0000
reviewersshu
bugs1344152
milestone45.8.0
Bug 1344152 - Find out how many scripts are in ASCII;r?shu MozReview-Commit-ID: GSN0nQossyq
js/src/frontend/Parser.cpp
js/src/frontend/Parser.h
js/src/frontend/TokenStream.cpp
js/src/frontend/TokenStream.h
js/src/jsfriendapi.h
js/xpconnect/src/XPCJSRuntime.cpp
toolkit/components/telemetry/Histograms.json
--- a/js/src/frontend/Parser.cpp
+++ b/js/src/frontend/Parser.cpp
@@ -663,16 +663,18 @@ Parser<ParseHandler>::Parser(ExclusiveCo
     foldConstants(foldConstants),
 #ifdef DEBUG
     checkOptionsCalled(false),
 #endif
     abortedSyntaxParse(false),
     isUnexpectedEOF_(false),
     handler(cx, *alloc, tokenStream, syntaxParser, lazyOuterFunction)
 {
+    
+
     {
         AutoLockForExclusiveAccess lock(cx);
         cx->perThreadData->addActiveCompilation();
     }
 
     // The Mozilla specific JSOPTION_EXTRA_WARNINGS option adds extra warnings
     // which are not generated if functions are parsed lazily. Note that the
     // standard "use strict" does not inhibit lazy parsing.
@@ -694,16 +696,18 @@ Parser<ParseHandler>::checkOptions()
         return false;
 
     return true;
 }
 
 template <typename ParseHandler>
 Parser<ParseHandler>::~Parser()
 {
+    
+
     MOZ_ASSERT(checkOptionsCalled);
 
     alloc.release(tempPoolMark);
 
     /*
      * The parser can allocate enormous amounts of memory for large functions.
      * Eagerly free the memory now (which otherwise won't be freed until the
      * next GC) to avoid unnecessary OOMs.
@@ -715,16 +719,18 @@ Parser<ParseHandler>::~Parser()
         context->perThreadData->removeActiveCompilation();
     }
 }
 
 template <typename ParseHandler>
 ObjectBox*
 Parser<ParseHandler>::newObjectBox(JSObject* obj)
 {
+    
+
     MOZ_ASSERT(obj);
 
     /*
      * We use JSContext.tempLifoAlloc to allocate parsed objects and place them
      * on a list in this Parser to ensure GC safety. Thus the tempLifoAlloc
      * arenas containing the entries must be alive until we are done with
      * scanning, parsing and code generation for the whole script or top-level
      * function.
@@ -775,16 +781,18 @@ FunctionBox::FunctionBox(ExclusiveContex
 template <typename ParseHandler>
 FunctionBox*
 Parser<ParseHandler>::newFunctionBox(Node fn, JSFunction* fun,
                                      ParseContext<ParseHandler>* outerpc,
                                      Directives inheritedDirectives,
                                      GeneratorKind generatorKind,
                                      JSObject* enclosingStaticScope)
 {
+    
+
     MOZ_ASSERT_IF(outerpc, enclosingStaticScope == outerpc->innermostStaticScope());
     MOZ_ASSERT(fun);
 
     /*
      * We use JSContext.tempLifoAlloc to allocate parsed objects and place them
      * on a list in this Parser to ensure GC safety. Thus the tempLifoAlloc
      * arenas containing the entries must be alive until we are done with
      * scanning, parsing and code generation for the whole script or top-level
@@ -816,16 +824,18 @@ ModuleBox::ModuleBox(ExclusiveContext* c
 {
     computeThisBinding(staticScope());
 }
 
 template <typename ParseHandler>
 ModuleBox*
 Parser<ParseHandler>::newModuleBox(Node pn, HandleModuleObject module)
 {
+    
+
     MOZ_ASSERT(module);
 
     /*
      * We use JSContext.tempLifoAlloc to allocate parsed objects and place them
      * on a list in this Parser to ensure GC safety. Thus the tempLifoAlloc
      * arenas containing the entries must be alive until we are done with
      * scanning, parsing and code generation for the whole module.
      */
@@ -868,16 +878,17 @@ MarkParser(JSTracer* trc, AutoGCRooter* 
 /*
  * Parse a top-level JS script.
  */
 template <typename ParseHandler>
 typename ParseHandler::Node
 Parser<ParseHandler>::parse()
 {
     MOZ_ASSERT(checkOptionsCalled);
+    
 
     /*
      * Protect atoms from being collected by a GC activation, which might
      * - nest on this thread due to out of memory (the so-called "last ditch"
      *   GC attempted within js_NewGCThing), or
      * - run for any reason on another thread if this thread is suspended on
      *   an object lock before it finishes generating bytecode into a script
      *   protected from the GC by a root or a stack frame reference.
@@ -901,16 +912,17 @@ Parser<ParseHandler>::parse()
                    "script", TokenKindToDesc(tt));
             return null();
         }
         if (foldConstants) {
             if (!FoldConstants(context, &pn, this))
                 return null();
         }
     }
+
     return pn;
 }
 
 template <typename ParseHandler>
 bool
 Parser<ParseHandler>::reportBadReturn(Node pn, ParseReportKind kind,
                                       unsigned errnum, unsigned anonerrnum)
 {
@@ -988,20 +1000,40 @@ Parser<ParseHandler>::standaloneModule(H
     if (!FoldConstants(context, &pn, this))
         return null();
 
     Rooted<Bindings> bindings(context, modulebox->bindings);
     if (!modulepc.generateBindings(context, tokenStream, alloc, &bindings))
         return null();
     modulebox->bindings = bindings;
 
+    addEncodingTelemetry();
     MOZ_ASSERT(mn->pn_modulebox == modulebox);
     return mn;
 }
 
+template<typename ParseHandler>
+void
+Parser<ParseHandler>::addEncodingTelemetry() {
+    JSContext* cx = context->maybeJSContext();
+    if (!cx) {
+        return;
+    } else {
+        fprintf(stderr, "Yoric: Encoding %s for %s (updating telemetry)\n",
+            tokenStream.isASCIISoFar() ? "ASCII": "_",
+            getFilename()
+        );
+    }
+    // FIXME: Need to review that this is an appropriate use of
+    // `runtimeFromAnyThread`.
+    cx->compartment()->runtimeFromAnyThread()->addTelemetry(JS_TELEMETRY_SCRIPT_ENCODING,
+        tokenStream.isASCIISoFar() ? 1: 0, ""
+    );
+}
+
 template <>
 SyntaxParseHandler::Node
 Parser<SyntaxParseHandler>::standaloneModule(HandleModuleObject module)
 {
     MOZ_ALWAYS_FALSE(abortIfSyntaxParser());
     return SyntaxParseHandler::NodeFailure;
 }
 
@@ -1042,32 +1074,35 @@ Parser<FullParseHandler>::evalBody()
     if (!body)
         return nullptr;
 
     if (!checkStatementsEOF())
         return nullptr;
 
     block->pn_expr = body;
     block->pn_pos = body->pn_pos;
+
+    addEncodingTelemetry();
     return block;
 }
 
 template <>
 ParseNode*
 Parser<FullParseHandler>::globalBody()
 {
     MOZ_ASSERT(pc->atGlobalLevel());
 
     ParseNode* body = statements(YieldIsName);
     if (!body)
         return nullptr;
 
     if (!checkStatementsEOF())
         return nullptr;
 
+    addEncodingTelemetry();
     return body;
 }
 
 template <typename ParseHandler>
 typename ParseHandler::Node
 Parser<ParseHandler>::newThisName()
 {
     Node thisName = newName(context->names().dotThis);
@@ -1191,16 +1226,17 @@ Parser<FullParseHandler>::standaloneFunc
         }
     }
 
     Rooted<Bindings> bindings(context, funbox->bindings);
     if (!funpc.generateBindings(context, tokenStream, alloc, &bindings))
         return null();
     funbox->bindings = bindings;
 
+    addEncodingTelemetry();
     return fn;
 }
 
 template <>
 bool
 Parser<FullParseHandler>::checkFunctionArguments()
 {
     /*
@@ -1312,16 +1348,18 @@ Parser<SyntaxParseHandler>::checkFunctio
     return true;
 }
 
 template <typename ParseHandler>
 typename ParseHandler::Node
 Parser<ParseHandler>::functionBody(InHandling inHandling, YieldHandling yieldHandling,
                                    FunctionSyntaxKind kind, FunctionBodyType type)
 {
+//    fprintf(stderr, "Yoric: Parser<>::functionBody()\n");
+
     MOZ_ASSERT(pc->sc->isFunctionBox());
     MOZ_ASSERT(!pc->funHasReturnExpr && !pc->funHasReturnVoid);
 
 #ifdef DEBUG
     uint32_t startYieldOffset = pc->lastYieldOffset;
 #endif
 
     Node pn;
@@ -2931,16 +2969,17 @@ Parser<FullParseHandler>::standaloneLazy
     Rooted<Bindings> bindings(context, funbox->bindings);
     if (!pc->generateBindings(context, tokenStream, alloc, &bindings))
         return null();
     funbox->bindings = bindings;
 
     if (!FoldConstants(context, &pn, this))
         return null();
 
+    addEncodingTelemetry();
     return pn;
 }
 
 template <typename ParseHandler>
 bool
 Parser<ParseHandler>::functionArgsAndBodyGeneric(InHandling inHandling,
                                                  YieldHandling yieldHandling, Node pn,
                                                  HandleFunction fun, FunctionSyntaxKind kind)
--- a/js/src/frontend/Parser.h
+++ b/js/src/frontend/Parser.h
@@ -893,16 +893,19 @@ class Parser : private JS::AutoGCRooter,
                        FunctionSyntaxKind kind = Expression);
 
     TokenPos pos() const { return tokenStream.currentToken().pos; }
 
     bool asmJS(Node list);
 
     void addTelemetry(JSCompartment::DeprecatedLanguageExtension e);
 
+    // Update telemetry on the encoding used in the script.
+    void addEncodingTelemetry();
+
     bool warnOnceAboutExprClosure();
 
     friend class LegacyCompExprTransplanter;
     friend struct BindData<ParseHandler>;
 };
 
 } /* namespace frontend */
 } /* namespace js */
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -302,17 +302,18 @@ TokenStream::TokenStream(ExclusiveContex
     prevLinebase(size_t(-1)),
     userbuf(cx, base, length, options.column),
     filename(options.filename()),
     displayURL_(nullptr),
     sourceMapURL_(nullptr),
     tokenbuf(cx),
     cx(cx),
     mutedErrors(options.mutedErrors()),
-    strictModeGetter(smg)
+    strictModeGetter(smg),
+    isASCIISoFar_(true)
 {
     // Nb: the following tables could be static, but initializing them here is
     // much easier.  Don't worry, the time to initialize them for each
     // TokenStream is trivial.  See bug 639420.
 
     // See Parser::assignExpr() for an explanation of isExprEnding[].
     memset(isExprEnding, 0, sizeof(isExprEnding));
     isExprEnding[TOK_COMMA] = 1;
@@ -1109,16 +1110,20 @@ TokenStream::getTokenInternal(TokenKind*
     }
 
     c = userbuf.getRawChar();
     MOZ_ASSERT(c != EOF);
 
     // Chars not in the range 0..127 are rare.  Getting them out of the way
     // early allows subsequent checking to be faster.
     if (MOZ_UNLIKELY(c >= 128)) {
+        if (isASCIISoFar_) {
+            fprintf(stderr, "Yoric: Not ASCII anymore %s\n", filename);
+            isASCIISoFar_ = false;
+        }
         if (IsSpaceOrBOM2(c)) {
             if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
                 updateLineInfoForEOL();
                 updateFlagsForEOL();
             }
 
             goto retry;
         }
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -843,16 +843,19 @@ class MOZ_STACK_CLASS TokenStream
     ExclusiveContext* context() const {
         return cx;
     }
 
     const ReadOnlyCompileOptions& options() const {
         return options_;
     }
 
+    bool isASCIISoFar() const {
+        return isASCIISoFar_;
+    }
   private:
     // This is the low-level interface to the JS source code buffer.  It just
     // gets raw chars, basically.  TokenStreams functions are layered on top
     // and do some extra stuff like converting all EOL sequences to '\n',
     // tracking the line number, and setting |flags.isEOF|.  (The "raw" in "raw
     // chars" refers to the lack of EOL sequence normalization.)
     //
     // buf[0..length-1] often represents a substring of some larger source,
@@ -1027,16 +1030,17 @@ class MOZ_STACK_CLASS TokenStream
     const char*         filename;          // input filename or null
     mozilla::UniquePtr<char16_t[], JS::FreePolicy> displayURL_; // the user's requested source URL or null
     mozilla::UniquePtr<char16_t[], JS::FreePolicy> sourceMapURL_; // source map's filename or null
     CharBuffer          tokenbuf;           // current token string buffer
     uint8_t             isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs?
     ExclusiveContext*   const cx;
     bool                mutedErrors;
     StrictModeGetter*   strictModeGetter;  // used to test for strict mode
+    bool                isASCIISoFar_;     // `true` if we have only encountered 7-bit chars so far.
 };
 
 // Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
 // message have const char16_t* type, not const char*.
 #define JSREPORT_UC 0x100
 
 extern const char*
 TokenKindToDesc(TokenKind tt);
--- a/js/src/jsfriendapi.h
+++ b/js/src/jsfriendapi.h
@@ -125,17 +125,18 @@ enum {
     JS_TELEMETRY_GC_NON_INCREMENTAL,
     JS_TELEMETRY_GC_SCC_SWEEP_TOTAL_MS,
     JS_TELEMETRY_GC_SCC_SWEEP_MAX_PAUSE_MS,
     JS_TELEMETRY_GC_MINOR_REASON,
     JS_TELEMETRY_GC_MINOR_REASON_LONG,
     JS_TELEMETRY_GC_MINOR_US,
     JS_TELEMETRY_DEPRECATED_LANGUAGE_EXTENSIONS_IN_CONTENT,
     JS_TELEMETRY_DEPRECATED_LANGUAGE_EXTENSIONS_IN_ADDONS,
-    JS_TELEMETRY_ADDON_EXCEPTIONS
+    JS_TELEMETRY_ADDON_EXCEPTIONS,
+    JS_TELEMETRY_SCRIPT_ENCODING
 };
 
 typedef void
 (*JSAccumulateTelemetryDataCallback)(int id, uint32_t sample, const char* key);
 
 extern JS_FRIEND_API(void)
 JS_SetAccumulateTelemetryCallback(JSRuntime* rt, JSAccumulateTelemetryDataCallback callback);
 
--- a/js/xpconnect/src/XPCJSRuntime.cpp
+++ b/js/xpconnect/src/XPCJSRuntime.cpp
@@ -3164,16 +3164,19 @@ AccumulateTelemetryCallback(int id, uint
         Telemetry::Accumulate(Telemetry::JS_DEPRECATED_LANGUAGE_EXTENSIONS_IN_CONTENT, sample);
         break;
       case JS_TELEMETRY_DEPRECATED_LANGUAGE_EXTENSIONS_IN_ADDONS:
         Telemetry::Accumulate(Telemetry::JS_DEPRECATED_LANGUAGE_EXTENSIONS_IN_ADDONS, sample);
         break;
       case JS_TELEMETRY_ADDON_EXCEPTIONS:
         Telemetry::Accumulate(Telemetry::JS_TELEMETRY_ADDON_EXCEPTIONS, nsDependentCString(key), sample);
         break;
+    case JS_TELEMETRY_SCRIPT_ENCODING:
+        Telemetry::Accumulate(Telemetry::JS_SCRIPT_ENCODING, sample);
+        break;
       default:
         MOZ_ASSERT_UNREACHABLE("Unexpected JS_TELEMETRY id");
     }
 }
 
 static void
 CompartmentNameCallback(JSRuntime* rt, JSCompartment* comp,
                         char* buf, size_t bufsize)
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -5391,16 +5391,24 @@
     "description": "Security UI Telemetry"
   },
   "JS_TELEMETRY_ADDON_EXCEPTIONS" : {
     "expires_in_version" : "never",
     "kind": "count",
     "keyed" : true,
     "description" : "Exceptions thrown by add-ons"
   },
+  "JS_SCRIPT_ENCODING": {
+    "alert_emails": ["dteller@mozilla.com"],
+    "bug_numbers": [1344152],
+    "expires_in_version": "never",
+    "kind": "enumerated",
+    "n_values": 10,
+    "description": "The encoding used in the source code of scripts. Unspecified=0, ASCII=1, other values reserved for future use."
+  },
   "IPC_TRANSACTION_CANCEL": {
     "alert_emails": ["billm@mozilla.com"],
     "expires_in_version": "never",
     "kind": "boolean",
     "description": "True when an IPC transaction is canceled"
   },
   "IPC_SAME_PROCESS_MESSAGE_COPY_OOM_KB": {
      "expires_in_version": "50",