Bug 1344152 - Find out how many scripts are in ASCII;r?shu
MozReview-Commit-ID: GSN0nQossyq
--- a/js/src/frontend/Parser.cpp
+++ b/js/src/frontend/Parser.cpp
@@ -663,16 +663,18 @@ Parser<ParseHandler>::Parser(ExclusiveCo
foldConstants(foldConstants),
#ifdef DEBUG
checkOptionsCalled(false),
#endif
abortedSyntaxParse(false),
isUnexpectedEOF_(false),
handler(cx, *alloc, tokenStream, syntaxParser, lazyOuterFunction)
{
+
+
{
AutoLockForExclusiveAccess lock(cx);
cx->perThreadData->addActiveCompilation();
}
// The Mozilla specific JSOPTION_EXTRA_WARNINGS option adds extra warnings
// which are not generated if functions are parsed lazily. Note that the
// standard "use strict" does not inhibit lazy parsing.
@@ -694,16 +696,18 @@ Parser<ParseHandler>::checkOptions()
return false;
return true;
}
template <typename ParseHandler>
Parser<ParseHandler>::~Parser()
{
+
+
MOZ_ASSERT(checkOptionsCalled);
alloc.release(tempPoolMark);
/*
* The parser can allocate enormous amounts of memory for large functions.
* Eagerly free the memory now (which otherwise won't be freed until the
* next GC) to avoid unnecessary OOMs.
@@ -715,16 +719,18 @@ Parser<ParseHandler>::~Parser()
context->perThreadData->removeActiveCompilation();
}
}
template <typename ParseHandler>
ObjectBox*
Parser<ParseHandler>::newObjectBox(JSObject* obj)
{
+
+
MOZ_ASSERT(obj);
/*
* We use JSContext.tempLifoAlloc to allocate parsed objects and place them
* on a list in this Parser to ensure GC safety. Thus the tempLifoAlloc
* arenas containing the entries must be alive until we are done with
* scanning, parsing and code generation for the whole script or top-level
* function.
@@ -775,16 +781,18 @@ FunctionBox::FunctionBox(ExclusiveContex
template <typename ParseHandler>
FunctionBox*
Parser<ParseHandler>::newFunctionBox(Node fn, JSFunction* fun,
ParseContext<ParseHandler>* outerpc,
Directives inheritedDirectives,
GeneratorKind generatorKind,
JSObject* enclosingStaticScope)
{
+
+
MOZ_ASSERT_IF(outerpc, enclosingStaticScope == outerpc->innermostStaticScope());
MOZ_ASSERT(fun);
/*
* We use JSContext.tempLifoAlloc to allocate parsed objects and place them
* on a list in this Parser to ensure GC safety. Thus the tempLifoAlloc
* arenas containing the entries must be alive until we are done with
* scanning, parsing and code generation for the whole script or top-level
@@ -816,16 +824,18 @@ ModuleBox::ModuleBox(ExclusiveContext* c
{
computeThisBinding(staticScope());
}
template <typename ParseHandler>
ModuleBox*
Parser<ParseHandler>::newModuleBox(Node pn, HandleModuleObject module)
{
+
+
MOZ_ASSERT(module);
/*
* We use JSContext.tempLifoAlloc to allocate parsed objects and place them
* on a list in this Parser to ensure GC safety. Thus the tempLifoAlloc
* arenas containing the entries must be alive until we are done with
* scanning, parsing and code generation for the whole module.
*/
@@ -868,16 +878,17 @@ MarkParser(JSTracer* trc, AutoGCRooter*
/*
* Parse a top-level JS script.
*/
template <typename ParseHandler>
typename ParseHandler::Node
Parser<ParseHandler>::parse()
{
MOZ_ASSERT(checkOptionsCalled);
+
/*
* Protect atoms from being collected by a GC activation, which might
* - nest on this thread due to out of memory (the so-called "last ditch"
* GC attempted within js_NewGCThing), or
* - run for any reason on another thread if this thread is suspended on
* an object lock before it finishes generating bytecode into a script
* protected from the GC by a root or a stack frame reference.
@@ -901,16 +912,17 @@ Parser<ParseHandler>::parse()
"script", TokenKindToDesc(tt));
return null();
}
if (foldConstants) {
if (!FoldConstants(context, &pn, this))
return null();
}
}
+
return pn;
}
template <typename ParseHandler>
bool
Parser<ParseHandler>::reportBadReturn(Node pn, ParseReportKind kind,
unsigned errnum, unsigned anonerrnum)
{
@@ -988,20 +1000,40 @@ Parser<ParseHandler>::standaloneModule(H
if (!FoldConstants(context, &pn, this))
return null();
Rooted<Bindings> bindings(context, modulebox->bindings);
if (!modulepc.generateBindings(context, tokenStream, alloc, &bindings))
return null();
modulebox->bindings = bindings;
+ addEncodingTelemetry();
MOZ_ASSERT(mn->pn_modulebox == modulebox);
return mn;
}
+template<typename ParseHandler>
+void
+Parser<ParseHandler>::addEncodingTelemetry() {
+ JSContext* cx = context->maybeJSContext();
+ if (!cx) {
+ return;
+ } else {
+ fprintf(stderr, "Yoric: Encoding %s for %s (updating telemetry)\n",
+ tokenStream.isASCIISoFar() ? "ASCII": "_",
+ getFilename()
+ );
+ }
+ // FIXME: Need to review that this is an appropriate use of
+ // `runtimeFromAnyThread`.
+ cx->compartment()->runtimeFromAnyThread()->addTelemetry(JS_TELEMETRY_SCRIPT_ENCODING,
+ tokenStream.isASCIISoFar() ? 1: 0, ""
+ );
+}
+
template <>
SyntaxParseHandler::Node
Parser<SyntaxParseHandler>::standaloneModule(HandleModuleObject module)
{
MOZ_ALWAYS_FALSE(abortIfSyntaxParser());
return SyntaxParseHandler::NodeFailure;
}
@@ -1042,32 +1074,35 @@ Parser<FullParseHandler>::evalBody()
if (!body)
return nullptr;
if (!checkStatementsEOF())
return nullptr;
block->pn_expr = body;
block->pn_pos = body->pn_pos;
+
+ addEncodingTelemetry();
return block;
}
template <>
ParseNode*
Parser<FullParseHandler>::globalBody()
{
MOZ_ASSERT(pc->atGlobalLevel());
ParseNode* body = statements(YieldIsName);
if (!body)
return nullptr;
if (!checkStatementsEOF())
return nullptr;
+ addEncodingTelemetry();
return body;
}
template <typename ParseHandler>
typename ParseHandler::Node
Parser<ParseHandler>::newThisName()
{
Node thisName = newName(context->names().dotThis);
@@ -1191,16 +1226,17 @@ Parser<FullParseHandler>::standaloneFunc
}
}
Rooted<Bindings> bindings(context, funbox->bindings);
if (!funpc.generateBindings(context, tokenStream, alloc, &bindings))
return null();
funbox->bindings = bindings;
+ addEncodingTelemetry();
return fn;
}
template <>
bool
Parser<FullParseHandler>::checkFunctionArguments()
{
/*
@@ -1312,16 +1348,18 @@ Parser<SyntaxParseHandler>::checkFunctio
return true;
}
template <typename ParseHandler>
typename ParseHandler::Node
Parser<ParseHandler>::functionBody(InHandling inHandling, YieldHandling yieldHandling,
FunctionSyntaxKind kind, FunctionBodyType type)
{
+// fprintf(stderr, "Yoric: Parser<>::functionBody()\n");
+
MOZ_ASSERT(pc->sc->isFunctionBox());
MOZ_ASSERT(!pc->funHasReturnExpr && !pc->funHasReturnVoid);
#ifdef DEBUG
uint32_t startYieldOffset = pc->lastYieldOffset;
#endif
Node pn;
@@ -2931,16 +2969,17 @@ Parser<FullParseHandler>::standaloneLazy
Rooted<Bindings> bindings(context, funbox->bindings);
if (!pc->generateBindings(context, tokenStream, alloc, &bindings))
return null();
funbox->bindings = bindings;
if (!FoldConstants(context, &pn, this))
return null();
+ addEncodingTelemetry();
return pn;
}
template <typename ParseHandler>
bool
Parser<ParseHandler>::functionArgsAndBodyGeneric(InHandling inHandling,
YieldHandling yieldHandling, Node pn,
HandleFunction fun, FunctionSyntaxKind kind)
--- a/js/src/frontend/Parser.h
+++ b/js/src/frontend/Parser.h
@@ -893,16 +893,19 @@ class Parser : private JS::AutoGCRooter,
FunctionSyntaxKind kind = Expression);
TokenPos pos() const { return tokenStream.currentToken().pos; }
bool asmJS(Node list);
void addTelemetry(JSCompartment::DeprecatedLanguageExtension e);
+ // Update telemetry on the encoding used in the script.
+ void addEncodingTelemetry();
+
bool warnOnceAboutExprClosure();
friend class LegacyCompExprTransplanter;
friend struct BindData<ParseHandler>;
};
} /* namespace frontend */
} /* namespace js */
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -302,17 +302,18 @@ TokenStream::TokenStream(ExclusiveContex
prevLinebase(size_t(-1)),
userbuf(cx, base, length, options.column),
filename(options.filename()),
displayURL_(nullptr),
sourceMapURL_(nullptr),
tokenbuf(cx),
cx(cx),
mutedErrors(options.mutedErrors()),
- strictModeGetter(smg)
+ strictModeGetter(smg),
+ isASCIISoFar_(true)
{
// Nb: the following tables could be static, but initializing them here is
// much easier. Don't worry, the time to initialize them for each
// TokenStream is trivial. See bug 639420.
// See Parser::assignExpr() for an explanation of isExprEnding[].
memset(isExprEnding, 0, sizeof(isExprEnding));
isExprEnding[TOK_COMMA] = 1;
@@ -1109,16 +1110,20 @@ TokenStream::getTokenInternal(TokenKind*
}
c = userbuf.getRawChar();
MOZ_ASSERT(c != EOF);
// Chars not in the range 0..127 are rare. Getting them out of the way
// early allows subsequent checking to be faster.
if (MOZ_UNLIKELY(c >= 128)) {
+ if (isASCIISoFar_) {
+ fprintf(stderr, "Yoric: Not ASCII anymore %s\n", filename);
+ isASCIISoFar_ = false;
+ }
if (IsSpaceOrBOM2(c)) {
if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
updateLineInfoForEOL();
updateFlagsForEOL();
}
goto retry;
}
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -843,16 +843,19 @@ class MOZ_STACK_CLASS TokenStream
ExclusiveContext* context() const {
return cx;
}
const ReadOnlyCompileOptions& options() const {
return options_;
}
+ bool isASCIISoFar() const {
+ return isASCIISoFar_;
+ }
private:
// This is the low-level interface to the JS source code buffer. It just
// gets raw chars, basically. TokenStreams functions are layered on top
// and do some extra stuff like converting all EOL sequences to '\n',
// tracking the line number, and setting |flags.isEOF|. (The "raw" in "raw
// chars" refers to the lack of EOL sequence normalization.)
//
// buf[0..length-1] often represents a substring of some larger source,
@@ -1027,16 +1030,17 @@ class MOZ_STACK_CLASS TokenStream
const char* filename; // input filename or null
mozilla::UniquePtr<char16_t[], JS::FreePolicy> displayURL_; // the user's requested source URL or null
mozilla::UniquePtr<char16_t[], JS::FreePolicy> sourceMapURL_; // source map's filename or null
CharBuffer tokenbuf; // current token string buffer
uint8_t isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs?
ExclusiveContext* const cx;
bool mutedErrors;
StrictModeGetter* strictModeGetter; // used to test for strict mode
+ bool isASCIISoFar_; // `true` if we have only encountered 7-bit chars so far.
};
// Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
// message have const char16_t* type, not const char*.
#define JSREPORT_UC 0x100
extern const char*
TokenKindToDesc(TokenKind tt);
--- a/js/src/jsfriendapi.h
+++ b/js/src/jsfriendapi.h
@@ -125,17 +125,18 @@ enum {
JS_TELEMETRY_GC_NON_INCREMENTAL,
JS_TELEMETRY_GC_SCC_SWEEP_TOTAL_MS,
JS_TELEMETRY_GC_SCC_SWEEP_MAX_PAUSE_MS,
JS_TELEMETRY_GC_MINOR_REASON,
JS_TELEMETRY_GC_MINOR_REASON_LONG,
JS_TELEMETRY_GC_MINOR_US,
JS_TELEMETRY_DEPRECATED_LANGUAGE_EXTENSIONS_IN_CONTENT,
JS_TELEMETRY_DEPRECATED_LANGUAGE_EXTENSIONS_IN_ADDONS,
- JS_TELEMETRY_ADDON_EXCEPTIONS
+ JS_TELEMETRY_ADDON_EXCEPTIONS,
+ JS_TELEMETRY_SCRIPT_ENCODING
};
typedef void
(*JSAccumulateTelemetryDataCallback)(int id, uint32_t sample, const char* key);
extern JS_FRIEND_API(void)
JS_SetAccumulateTelemetryCallback(JSRuntime* rt, JSAccumulateTelemetryDataCallback callback);
--- a/js/xpconnect/src/XPCJSRuntime.cpp
+++ b/js/xpconnect/src/XPCJSRuntime.cpp
@@ -3164,16 +3164,19 @@ AccumulateTelemetryCallback(int id, uint
Telemetry::Accumulate(Telemetry::JS_DEPRECATED_LANGUAGE_EXTENSIONS_IN_CONTENT, sample);
break;
case JS_TELEMETRY_DEPRECATED_LANGUAGE_EXTENSIONS_IN_ADDONS:
Telemetry::Accumulate(Telemetry::JS_DEPRECATED_LANGUAGE_EXTENSIONS_IN_ADDONS, sample);
break;
case JS_TELEMETRY_ADDON_EXCEPTIONS:
Telemetry::Accumulate(Telemetry::JS_TELEMETRY_ADDON_EXCEPTIONS, nsDependentCString(key), sample);
break;
+ case JS_TELEMETRY_SCRIPT_ENCODING:
+ Telemetry::Accumulate(Telemetry::JS_SCRIPT_ENCODING, sample);
+ break;
default:
MOZ_ASSERT_UNREACHABLE("Unexpected JS_TELEMETRY id");
}
}
static void
CompartmentNameCallback(JSRuntime* rt, JSCompartment* comp,
char* buf, size_t bufsize)
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -5391,16 +5391,24 @@
"description": "Security UI Telemetry"
},
"JS_TELEMETRY_ADDON_EXCEPTIONS" : {
"expires_in_version" : "never",
"kind": "count",
"keyed" : true,
"description" : "Exceptions thrown by add-ons"
},
+ "JS_SCRIPT_ENCODING": {
+ "alert_emails": ["dteller@mozilla.com"],
+ "bug_numbers": [1344152],
+ "expires_in_version": "never",
+ "kind": "enumerated",
+ "n_values": 10,
+ "description": "The encoding used in the source code of scripts. Unspecified=0, ASCII=1, other values reserved for future use."
+ },
"IPC_TRANSACTION_CANCEL": {
"alert_emails": ["billm@mozilla.com"],
"expires_in_version": "never",
"kind": "boolean",
"description": "True when an IPC transaction is canceled"
},
"IPC_SAME_PROCESS_MESSAGE_COPY_OOM_KB": {
"expires_in_version": "50",