bug 1259832 - replace generated source file names in symbol files. r=chmanchester draft
authorTed Mielczarek <ted@mielczarek.org>
Wed, 09 Aug 2017 14:36:52 -0400
changeset 648518 768e25dea442661ec79727d45895bce9fed86a47
parent 648517 c494bebe332e8c9fe1df7e25b6c23bbef20be289
child 726842 8d44f05cedf7e3aed75cb7f09c1343ced1d0db8e
push id74775
push userbmo:ted@mielczarek.org
push dateThu, 17 Aug 2017 21:15:41 +0000
reviewerschmanchester
bugs1259832
milestone57.0a1
bug 1259832 - replace generated source file names in symbol files. r=chmanchester Now that builds are uploading generated source files to an S3 bucket, symbolstore.py can alter the FILE lines in symbol files to record the URLs where those generated source files can be found. We currently record files from the hg repository as `hg:<repo>:<path>:<revision>`, so here we record generated files as `s3:<bucket>:<path>:` and expect that Socorro will map that to the S3 bucket in a sensible way. This patch does not change source server indexing, which allows Microsoft debuggers to fetch source files for a build. That will be handled in a followup. MozReview-Commit-ID: 1g14smF0fo8
toolkit/crashreporter/tools/symbolstore.py
toolkit/crashreporter/tools/unit-symbolstore.py
--- a/toolkit/crashreporter/tools/symbolstore.py
+++ b/toolkit/crashreporter/tools/symbolstore.py
@@ -36,16 +36,22 @@ import subprocess
 import time
 import ctypes
 import urlparse
 import concurrent.futures
 import multiprocessing
 
 from optparse import OptionParser
 
+from mozbuild.util import memoize
+from mozbuild.generated_sources import (
+    get_filename_with_digest,
+    get_generated_sources,
+    get_s3_region_and_bucket,
+)
 from mozpack.copier import FileRegistry
 from mozpack.manifests import (
     InstallManifest,
     UnreadableInstallManifest,
 )
 
 # Utility classes
 
@@ -326,16 +332,25 @@ def make_file_mapping(install_manifests)
         reg = FileRegistry()
         manifest.populate_registry(reg)
         for dst, src in reg:
             if hasattr(src, 'path'):
                 abs_dest = os.path.normpath(os.path.join(destination, dst))
                 file_mapping[abs_dest] = src.path
     return file_mapping
 
+@memoize
+def get_generated_file_s3_path(filename, rel_path, bucket):
+    """Given a filename, return a path formatted similarly to
+    GetVCSFilename but representing a file available in an s3 bucket."""
+    with open(filename, 'rb') as f:
+        path = get_filename_with_digest(rel_path, f.read())
+        return 's3:{bucket}:{path}:'.format(bucket=bucket, path=path)
+
+
 def GetPlatformSpecificDumper(**kwargs):
     """This function simply returns a instance of a subclass of Dumper
     that is appropriate for the current platform."""
     return {'WINNT': Dumper_Win32,
             'Linux': Dumper_Linux,
             'Darwin': Dumper_Mac}[buildconfig.substs['OS_ARCH']](**kwargs)
 
 def SourceIndex(fileStream, outputPath, vcs_root):
@@ -371,29 +386,33 @@ class Dumper:
     srcdirRepoInfo = {}
 
     def __init__(self, dump_syms, symbol_path,
                  archs=None,
                  srcdirs=[],
                  copy_debug=False,
                  vcsinfo=False,
                  srcsrv=False,
+                 generated_files=None,
+                 s3_bucket=None,
                  file_mapping=None):
         # popen likes absolute paths, at least on windows
         self.dump_syms = os.path.abspath(dump_syms)
         self.symbol_path = symbol_path
         if archs is None:
             # makes the loop logic simpler
             self.archs = ['']
         else:
             self.archs = ['-a %s' % a for a in archs.split()]
         self.srcdirs = [os.path.normpath(self.FixFilenameCase(a)) for a in srcdirs]
         self.copy_debug = copy_debug
         self.vcsinfo = vcsinfo
         self.srcsrv = srcsrv
+        self.generated_files = generated_files or {}
+        self.s3_bucket = s3_bucket
         self.file_mapping = file_mapping or {}
         # Add a static mapping for Rust sources.
         target_os = buildconfig.substs['OS_ARCH']
         rust_srcdir = None
         if target_os == 'WINNT':
             rust_srcdir = 'C:/projects/rust/'
         elif target_os == 'Darwin':
             rust_srcdir = '/Users/travis/build/rust-lang/rust/'
@@ -491,17 +510,22 @@ class Dumper:
                         # FILE index filename
                         (x, index, filename) = line.rstrip().split(None, 2)
                         filename = os.path.normpath(self.FixFilenameCase(filename))
                         # We want original file paths for the source server.
                         sourcepath = filename
                         if filename in self.file_mapping:
                             filename = self.file_mapping[filename]
                         if self.vcsinfo:
-                            (filename, rootname) = GetVCSFilename(filename, self.srcdirs)
+                            gen_path = self.generated_files.get(filename)
+                            if gen_path and self.s3_bucket:
+                                filename = get_generated_file_s3_path(filename, gen_path, self.s3_bucket)
+                                rootname = ''
+                            else:
+                                (filename, rootname) = GetVCSFilename(filename, self.srcdirs)
                             # sets vcs_root in case the loop through files were to end on an empty rootname
                             if vcs_root is None:
                               if rootname:
                                  vcs_root = rootname
                         # gather up files with hg for indexing
                         if filename.startswith("hg"):
                             (ver, checkout, source_file, revision) = filename.split(":", 3)
                             sourceFileStream += sourcepath + "*" + source_file + '*' + revision + "\r\n"
@@ -662,16 +686,17 @@ class Dumper_Win32(Dumper):
             pdbstr = os.path.normpath(pdbstr_path)
             subprocess.call([pdbstr, "-w", "-p:" + os.path.basename(debug_file),
                              "-i:" + os.path.basename(streamFilename), "-s:srcsrv"],
                             cwd=os.path.dirname(stream_output_path))
             # clean up all the .stream files when done
             os.remove(stream_output_path)
         return result
 
+
 class Dumper_Linux(Dumper):
     objcopy = os.environ['OBJCOPY'] if 'OBJCOPY' in os.environ else 'objcopy'
     def ShouldProcess(self, file):
         """This function will allow processing of files that are
         executable, or end with the .so extension, and additionally
         file(1) reports as being ELF files.  It expects to find the file
         command in PATH."""
         if file.endswith(".so") or os.access(file, os.X_OK):
@@ -835,22 +860,27 @@ to canonical locations in the source rep
         exit(1)
 
     try:
         manifests = validate_install_manifests(options.install_manifests)
     except (IOError, ValueError) as e:
         parser.error(str(e))
         exit(1)
     file_mapping = make_file_mapping(manifests)
+    generated_files = {os.path.join(buildconfig.topobjdir, f): f
+                          for (f, _) in get_generated_sources()}
+    _, bucket = get_s3_region_and_bucket()
     dumper = GetPlatformSpecificDumper(dump_syms=args[0],
                                        symbol_path=args[1],
                                        copy_debug=options.copy_debug,
                                        archs=options.archs,
                                        srcdirs=options.srcdir,
                                        vcsinfo=options.vcsinfo,
                                        srcsrv=options.srcsrv,
+                                       generated_files=generated_files,
+                                       s3_bucket=bucket,
                                        file_mapping=file_mapping)
 
     dumper.Process(args[2])
 
 # run main if run directly
 if __name__ == "__main__":
     main()
--- a/toolkit/crashreporter/tools/unit-symbolstore.py
+++ b/toolkit/crashreporter/tools/unit-symbolstore.py
@@ -209,16 +209,38 @@ class TestGetVCSFilename(HelperMixin, un
         os.environ['MOZ_SOURCE_REPO'] = 'https://somewhere.com/repo'
         os.environ['MOZ_SOURCE_CHANGESET'] = 'abcdef0123456'
         os.mkdir(os.path.join(self.test_dir, '.hg'))
         filename = os.path.join(self.test_dir, 'foo.c')
         self.assertEqual('hg:somewhere.com/repo:foo.c:abcdef0123456',
                          symbolstore.GetVCSFilename(filename, [self.test_dir])[0])
 
 
+# SHA-512 of a zero-byte file
+EMPTY_SHA512 = 'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'
+
+
+class TestGeneratedFilePath(HelperMixin, unittest.TestCase):
+    def setUp(self):
+        HelperMixin.setUp(self)
+
+    def tearDown(self):
+        HelperMixin.tearDown(self)
+
+    def test_generated_file_path(self):
+        # Make an empty generated file
+        g = os.path.join(self.test_dir, 'generated')
+        rel_path = 'a/b/generated'
+        with open(g, 'wb') as f:
+            pass
+        expected = 's3:bucket:{}/{}:'.format(EMPTY_SHA512,
+                                             rel_path)
+        self.assertEqual(expected, symbolstore.get_generated_file_s3_path(g, rel_path, 'bucket'))
+
+
 if target_platform() == 'WINNT':
     class TestFixFilenameCase(HelperMixin, unittest.TestCase):
         def test_fix_filename_case(self):
             # self.test_dir is going to be 8.3 paths...
             junk = os.path.join(self.test_dir, 'x')
             with open(junk, 'wb') as o:
                 o.write('x')
             d = symbolstore.Dumper_Win32(dump_syms='dump_syms',