Bug 1464522 - Count static initializers from the crash reporter symbol files. r=froydnj draft
authorMike Hommey <mh+mozilla@glandium.org>
Tue, 29 May 2018 08:48:47 +0900
changeset 801272 8b88666223d6b67885dcbe9abc52395c12a2bd61
parent 800730 494342c311c07be38d7fff6c9809a0ff913bfd95
push id111621
push userbmo:mh+mozilla@glandium.org
push dateTue, 29 May 2018 23:35:46 +0000
reviewersfroydnj
bugs1464522
milestone62.0a1
Bug 1464522 - Count static initializers from the crash reporter symbol files. r=froydnj The crash reporter symbol files are the easiest cross-platform way to find static initializers. While some types of static initializers (e.g. __attribute__(constructor) functions) don't appear there in a notable way, the static initializers we do care the most about for tracking do (static initializers from C++ globals). As a matter of fact, there is only a difference of 2 compared to the currently reported count of 125 on a linux64 build, so this is a good enough approximation. And allows us to easily track the count on Android, OSX and Windows builds, which we currently don't do. The tricky part is that the symbol files are in dist/crashreporter-symbols/$lib/$fileid/$lib.sym, and $fileid is hard to figure out. There is a `fileid` tool in testing/tools, but it is a target binary, meaning it's not available on cross builds (OSX, Android). So the simplest is just to gather the data while creating the symbol files, which unfortunately requires to go through some hoops to make it happen for just the files we care about.
build/util/count_ctors.py
config/rules.mk
python/mozbuild/mozbuild/action/dumpsymbols.py
taskcluster/ci/build/linux.yml
testing/mozharness/configs/builds/releng_sub_linux_configs/64_tup.py
testing/mozharness/configs/builds/releng_sub_linux_configs/enable_count_ctors.py
testing/mozharness/mozharness/mozilla/building/buildbase.py
toolkit/crashreporter/tools/symbolstore.py
toolkit/library/Makefile.in
deleted file mode 100644
--- a/build/util/count_ctors.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/python
-import json
-
-import re
-import subprocess
-import sys
-
-
-def count_ctors(filename):
-    proc = subprocess.Popen(
-        ['readelf', '-W', '-S', filename], stdout=subprocess.PIPE)
-
-    # Some versions of ld produce both .init_array and .ctors.  So we have
-    # to check for both.
-    n_init_array_ctors = 0
-    have_init_array = False
-    n_ctors_ctors = 0
-    have_ctors = False
-
-    for line in proc.stdout:
-        f = line.split()
-        if len(f) != 11:
-            continue
-        # Don't try to int()-parse the header line for the section summaries.
-        if not re.match("\\[\\d+\\]", f[0]):
-            continue
-        section_name, contents, size, align = f[1], f[2], int(f[5], 16), int(f[10])
-        if section_name == ".ctors" and contents == "PROGBITS":
-            have_ctors = True
-            # Subtract 2 for the uintptr_t(-1) header and the null terminator.
-            n_ctors_ctors = size / align - 2
-        if section_name == ".init_array" and contents == "INIT_ARRAY":
-            have_init_array = True
-            n_init_array_ctors = size / align
-
-    if have_init_array:
-        # Even if we have .ctors, we shouldn't have any constructors in .ctors.
-        # Complain if .ctors does not look how we expect it to.
-        if have_ctors and n_ctors_ctors != 0:
-            print >>sys.stderr, "Unexpected .ctors contents for", filename
-            sys.exit(1)
-        return n_init_array_ctors
-    if have_ctors:
-        return n_ctors_ctors
-
-    # We didn't find anything; somebody switched initialization mechanisms on
-    # us, or the binary is completely busted.  Complain either way.
-    print >>sys.stderr, "Couldn't find .init_array or .ctors in", filename
-    sys.exit(1)
-
-
-if __name__ == '__main__':
-    for f in sys.argv[1:]:
-        perfherder_data = {
-            "framework": {"name": "build_metrics"},
-            "suites": [{
-                "name": "compiler_metrics",
-                "subtests": [{
-                    "name": "num_static_constructors",
-                    "value": count_ctors(f),
-                    "alertChangeType": "absolute",
-                    "alertThreshold": 3
-                }]}
-            ]
-        }
-        print("PERFHERDER_DATA: %s" % json.dumps(perfherder_data))
--- a/config/rules.mk
+++ b/config/rules.mk
@@ -775,17 +775,17 @@ ifdef ASFILES
 	$(REPORT_BUILD_VERBOSE)
 	$(AS) $(ASOUTOPTION)$@ $(ASFLAGS) $($(notdir $<)_FLAGS) $(AS_DASH_C_FLAG) $(_VPATH_SRCS)
 endif
 
 define syms_template
 syms:: $(2)
 $(2): $(1)
 ifdef MOZ_CRASHREPORTER
-	$$(call py_action,dumpsymbols,$$(abspath $$<) $$(abspath $$@))
+	$$(call py_action,dumpsymbols,$$(abspath $$<) $$(abspath $$@) $$(DUMP_SYMBOLS_FLAGS))
 endif
 endef
 
 ifndef MOZ_PROFILE_GENERATE
 ifneq (,$(filter $(DIST)/bin%,$(FINAL_TARGET)))
 DUMP_SYMS_TARGETS := $(SHARED_LIBRARY) $(PROGRAM) $(SIMPLE_PROGRAMS)
 endif
 endif
--- a/python/mozbuild/mozbuild/action/dumpsymbols.py
+++ b/python/mozbuild/mozbuild/action/dumpsymbols.py
@@ -6,17 +6,17 @@ from __future__ import absolute_import, 
 
 import argparse
 import buildconfig
 import subprocess
 import shutil
 import sys
 import os
 
-def dump_symbols(target, tracking_file):
+def dump_symbols(target, tracking_file, count_ctors=False):
     # Our tracking file, if present, will contain path(s) to the previously generated
     # symbols. Remove them in this case so we don't simply accumulate old symbols
     # during incremental builds.
     if os.path.isfile(os.path.normpath(tracking_file)):
         with open(tracking_file, 'r') as fh:
             files = fh.read().splitlines()
         dirs = set(os.path.dirname(f) for f in files)
         for d in dirs:
@@ -55,25 +55,34 @@ def dump_symbols(target, tracking_file):
 
     args = ([buildconfig.substs['PYTHON'], os.path.join(buildconfig.topsrcdir, 'toolkit',
                                                        'crashreporter', 'tools', 'symbolstore.py')] +
             sym_store_args +
             ['-s', buildconfig.topsrcdir, dump_syms_bin, os.path.join(buildconfig.topobjdir,
                                                                       'dist',
                                                                       'crashreporter-symbols'),
              os.path.abspath(target)])
+    if count_ctors:
+        args.append('--count-ctors')
     print('Running: %s' % ' '.join(args))
     out_files = subprocess.check_output(args)
     with open(tracking_file, 'w') as fh:
         fh.write(out_files)
         fh.flush()
 
 def main(argv):
-    if len(argv) != 2:
-        print("Usage: dumpsymbols.py <library or program> <tracking file>",
-              file=sys.stderr)
-        return 1
+    parser = argparse.ArgumentParser(
+        usage="Usage: dumpsymbols.py <library or program> <tracking file>")
+    parser.add_argument("--count-ctors",
+                        action="store_true", default=False,
+                        help="Count static initializers")
+    parser.add_argument("library_or_program",
+                        help="Path to library or program")
+    parser.add_argument("tracking_file",
+                        help="Tracking file")
+    args = parser.parse_args()
 
-    return dump_symbols(*argv)
+    return dump_symbols(args.library_or_program, args.tracking_file,
+                        args.count_ctors)
 
 
 if __name__ == '__main__':
     sys.exit(main(sys.argv[1:]))
--- a/taskcluster/ci/build/linux.yml
+++ b/taskcluster/ci/build/linux.yml
@@ -10,17 +10,16 @@ linux64/opt:
     worker:
         max-run-time: 36000
     run:
         using: mozharness
         actions: [get-secrets build check-test update]
         config:
             - builds/releng_base_firefox.py
             - builds/releng_base_linux_64_builds.py
-            - builds/releng_sub_linux_configs/enable_count_ctors.py
         script: "mozharness/scripts/fx_desktop_build.py"
         secrets: true
         tooltool-downloads: public
         need-xvfb: true
     toolchains:
         - linux64-clang
         - linux64-gcc
         - linux64-rust
@@ -98,17 +97,16 @@ linux64/pgo:
         max-run-time: 36000
     run:
         using: mozharness
         actions: [get-secrets build check-test update]
         options: [enable-pgo]
         config:
             - builds/releng_base_firefox.py
             - builds/releng_base_linux_64_builds.py
-            - builds/releng_sub_linux_configs/enable_count_ctors.py
         script: "mozharness/scripts/fx_desktop_build.py"
         secrets: true
         tooltool-downloads: public
         need-xvfb: true
     toolchains:
         - linux64-clang
         - linux64-gcc
         - linux64-rust
@@ -307,17 +305,16 @@ linux/opt:
         docker-image: {in-tree: debian7-i386-build}
         max-run-time: 36000
     run:
         using: mozharness
         actions: [get-secrets build check-test update]
         config:
             - builds/releng_base_firefox.py
             - builds/releng_base_linux_32_builds.py
-            - builds/releng_sub_linux_configs/enable_count_ctors.py
         script: "mozharness/scripts/fx_desktop_build.py"
         secrets: true
         tooltool-downloads: public
         need-xvfb: true
     toolchains:
         - linux64-clang
         - linux64-gcc
         - linux64-rust
@@ -366,17 +363,16 @@ linux/pgo:
         max-run-time: 36000
     run:
         using: mozharness
         actions: [get-secrets build check-test update]
         options: [enable-pgo]
         config:
             - builds/releng_base_firefox.py
             - builds/releng_base_linux_32_builds.py
-            - builds/releng_sub_linux_configs/enable_count_ctors.py
         script: "mozharness/scripts/fx_desktop_build.py"
         secrets: true
         tooltool-downloads: public
         need-xvfb: true
     toolchains:
         - linux64-clang
         - linux64-gcc
         - linux64-rust
--- a/testing/mozharness/configs/builds/releng_sub_linux_configs/64_tup.py
+++ b/testing/mozharness/configs/builds/releng_sub_linux_configs/64_tup.py
@@ -22,12 +22,11 @@ config = {
         'LD_LIBRARY_PATH': '%(abs_obj_dir)s/dist/bin',
         'TINDERBOX_OUTPUT': '1',
 
         # sccache doesn't work yet with tup due to the way the server is
         # spawned, and because the server does the file I/O
         'SCCACHE_DISABLE': '1',
     },
     'mozconfig_variant': 'tup',
-    'enable_count_ctors': False, # TODO: libxul.so needs to be linked for this to work
     'disable_package_metrics': True, # TODO: the package needs to be created for this to work
     'artifact_flag_build_variant_in_try': None,
 }
deleted file mode 100644
--- a/testing/mozharness/configs/builds/releng_sub_linux_configs/enable_count_ctors.py
+++ /dev/null
@@ -1,5 +0,0 @@
-config = {
-    # this enables counting constructors, which we only want to do on a
-    # very small number of variants (see bug 1261081)
-    'enable_count_ctors': True
-}
--- a/testing/mozharness/mozharness/mozilla/building/buildbase.py
+++ b/testing/mozharness/mozharness/mozilla/building/buildbase.py
@@ -1079,34 +1079,16 @@ or run without that action (ie: --no-{ac
         # Look at what we have checked out
         if os.path.exists(source_path):
             hg = self.query_exe('hg', return_type='list')
             revision = self.get_output_from_command(
                 hg + ['parent', '--template', '{node}'], cwd=source_path
             )
         return revision.encode('ascii', 'replace') if revision else None
 
-    def _count_ctors(self):
-        """count num of ctors and set testresults."""
-        dirs = self.query_abs_dirs()
-        python_path = os.path.join(dirs['abs_work_dir'], 'venv', 'bin',
-                                   'python')
-        abs_count_ctors_path = os.path.join(dirs['abs_src_dir'],
-                                            'build',
-                                            'util',
-                                            'count_ctors.py')
-        abs_libxul_path = os.path.join(dirs['abs_obj_dir'],
-                                       'dist',
-                                       'bin',
-                                       'libxul.so')
-
-        cmd = [python_path, abs_count_ctors_path, abs_libxul_path]
-        self.get_output_from_command(cmd, cwd=dirs['abs_src_dir'],
-                                     throw_exception=True)
-
     def _query_props_set_by_mach(self, console_output=True, error_level=FATAL):
         mach_properties_path = os.path.join(
             self.query_abs_dirs()['abs_obj_dir'], 'dist', 'mach_build_properties.json'
         )
         self.info("setting properties set by mach build. Looking in path: %s"
                   % mach_properties_path)
         if os.path.exists(mach_properties_path):
             with self.opened(mach_properties_path, error_level=error_level) as (fh, err):
@@ -1713,22 +1695,16 @@ or run without that action (ie: --no-{ac
         self.info('Collecting build metrics')
 
         if self.config.get('forced_artifact_build'):
             self.info('Skipping due to forced artifact build.')
             return
 
         c = self.config
 
-        if c.get('enable_count_ctors'):
-            self.info("counting ctors...")
-            self._count_ctors()
-        else:
-            self.info("ctors counts are disabled for this build.")
-
         # Report some important file sizes for display in treeherder
 
         perfherder_data = {
             "framework": {
                 "name": "build_metrics"
             },
             "suites": [],
         }
--- a/toolkit/crashreporter/tools/symbolstore.py
+++ b/toolkit/crashreporter/tools/symbolstore.py
@@ -487,43 +487,46 @@ class Dumper:
     # This is a no-op except on Win32
     def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
         return ""
 
     # subclasses override this if they want to support this
     def CopyDebug(self, file, debug_file, guid, code_file, code_id):
         pass
 
-    def Process(self, file_to_process):
+    def Process(self, file_to_process, count_ctors=False):
         """Process the given file."""
         if self.ShouldProcess(os.path.abspath(file_to_process)):
-            self.ProcessFile(file_to_process)
+            self.ProcessFile(file_to_process, count_ctors=count_ctors)
 
-    def ProcessFile(self, file, dsymbundle=None):
+    def ProcessFile(self, file, dsymbundle=None, count_ctors=False):
         """Dump symbols from these files into a symbol file, stored
         in the proper directory structure in  |symbol_path|; processing is performed
         asynchronously, and Finish must be called to wait for it complete and cleanup.
         All files after the first are fallbacks in case the first file does not process
         successfully; if it does, no other files will be touched."""
         print("Beginning work for file: %s" % file, file=sys.stderr)
 
         # tries to get the vcs root from the .mozconfig first - if it's not set
         # the tinderbox vcs path will be assigned further down
         vcs_root = os.environ.get('MOZ_SOURCE_REPO')
         for arch_num, arch in enumerate(self.archs):
-            self.ProcessFileWork(file, arch_num, arch, vcs_root, dsymbundle)
+            self.ProcessFileWork(file, arch_num, arch, vcs_root, dsymbundle,
+                                 count_ctors=count_ctors)
 
     def dump_syms_cmdline(self, file, arch, dsymbundle=None):
         '''
         Get the commandline used to invoke dump_syms.
         '''
         # The Mac dumper overrides this.
         return [self.dump_syms, file]
 
-    def ProcessFileWork(self, file, arch_num, arch, vcs_root, dsymbundle=None):
+    def ProcessFileWork(self, file, arch_num, arch, vcs_root, dsymbundle=None,
+                        count_ctors=False):
+        ctors = 0
         t_start = time.time()
         print("Processing file: %s" % file, file=sys.stderr)
 
         sourceFileStream = ''
         code_id, code_file = None, None
         try:
             cmd = self.dump_syms_cmdline(file, arch, dsymbundle=dsymbundle)
             print(' '.join(cmd), file=sys.stderr)
@@ -577,16 +580,26 @@ class Dumper:
                         # INFO CODE_ID code_id code_file
                         # This gives some info we can use to
                         # store binaries in the symbol store.
                         bits = line.rstrip().split(None, 3)
                         if len(bits) == 4:
                             code_id, code_file = bits[2:]
                         f.write(line)
                     else:
+                        if count_ctors and line.startswith("FUNC "):
+                            # Static initializers, as created by clang and gcc
+                            # have symbols that start with "_GLOBAL_sub"
+                            if '_GLOBAL__sub_' in line:
+                                ctors += 1
+                            # MSVC creates `dynamic initializer for '...'`
+                            # symbols.
+                            elif "`dynamic initializer for '" in line:
+                                ctors += 1
+
                         # pass through all other lines unchanged
                         f.write(line)
                 f.close()
                 proc.wait()
                 # we output relative paths so callers can get a list of what
                 # was generated
                 print(rel_path)
                 if self.srcsrv and vcs_root:
@@ -600,16 +613,34 @@ class Dumper:
             pass
         except Exception as e:
             print("Unexpected error: %s" % str(e), file=sys.stderr)
             raise
 
         if dsymbundle:
             shutil.rmtree(dsymbundle)
 
+        if count_ctors:
+            import json
+
+            perfherder_data = {
+                "framework": {"name": "build_metrics"},
+                "suites": [{
+                    "name": "compiler_metrics",
+                    "subtests": [{
+                        "name": "num_static_constructors",
+                        "value": ctors,
+                        "alertChangeType": "absolute",
+                        "alertThreshold": 3
+                    }]}
+                ]
+            }
+            print('PERFHERDER_DATA: %s' % json.dumps(perfherder_data),
+                  file=sys.stderr)
+
         elapsed = time.time() - t_start
         print('Finished processing %s in %.2fs' % (file, elapsed),
               file=sys.stderr)
 
 # Platform-specific subclasses.  For the most part, these just have
 # logic to determine what files to extract symbols from.
 
 def locate_pdb(path):
@@ -763,23 +794,24 @@ class Dumper_Mac(Dumper):
         """This function will allow processing of files that are
         executable, or end with the .dylib extension, and additionally
         file(1) reports as being Mach-O files.  It expects to find the file
         command in PATH."""
         if file.endswith(".dylib") or os.access(file, os.X_OK):
             return self.RunFileCommand(file).startswith("Mach-O")
         return False
 
-    def ProcessFile(self, file):
+    def ProcessFile(self, file, count_ctors=False):
         print("Starting Mac pre-processing on file: %s" % file,
               file=sys.stderr)
         dsymbundle = self.GenerateDSYM(file)
         if dsymbundle:
             # kick off new jobs per-arch with our new list of files
-            Dumper.ProcessFile(self, file, dsymbundle=dsymbundle)
+            Dumper.ProcessFile(self, file, dsymbundle=dsymbundle,
+                               count_ctors=count_ctors)
 
     def dump_syms_cmdline(self, file, arch, dsymbundle=None):
         '''
         Get the commandline used to invoke dump_syms.
         '''
         # dump_syms wants the path to the original binary and the .dSYM
         # in order to dump all the symbols.
         if dsymbundle:
@@ -858,16 +890,19 @@ def main():
                       help="Add source index information to debug files, making them suitable for use in a source server.")
     parser.add_option("--install-manifest",
                       action="append", dest="install_manifests",
                       default=[],
                       help="""Use this install manifest to map filenames back
 to canonical locations in the source repository. Specify
 <install manifest filename>,<install destination> as a comma-separated pair.
 """)
+    parser.add_option("--count-ctors",
+                      action="store_true", dest="count_ctors", default=False,
+                      help="Count static initializers")
     (options, args) = parser.parse_args()
 
     #check to see if the pdbstr.exe exists
     if options.srcsrv:
         pdbstr = os.environ.get("PDBSTR_PATH")
         if not os.path.exists(pdbstr):
             print("Invalid path to pdbstr.exe - please set/check PDBSTR_PATH.\n", file=sys.stderr)
             sys.exit(1)
@@ -892,13 +927,13 @@ to canonical locations in the source rep
                                        archs=options.archs,
                                        srcdirs=options.srcdir,
                                        vcsinfo=options.vcsinfo,
                                        srcsrv=options.srcsrv,
                                        generated_files=generated_files,
                                        s3_bucket=bucket,
                                        file_mapping=file_mapping)
 
-    dumper.Process(args[2])
+    dumper.Process(args[2], options.count_ctors)
 
 # run main if run directly
 if __name__ == "__main__":
     main()
--- a/toolkit/library/Makefile.in
+++ b/toolkit/library/Makefile.in
@@ -6,8 +6,10 @@ include $(topsrcdir)/toolkit/library/lib
 
 include $(topsrcdir)/config/config.mk
 
 include $(topsrcdir)/config/rules.mk
 
 .PHONY: gtestxul
 gtestxul:
 	$(MAKE) -C $(DEPTH) toolkit/library/gtest/target LINK_GTEST_DURING_COMPILE=1
+
+DUMP_SYMBOLS_FLAGS = --count-ctors