Bug 1361172 - Rewrite code for finding files in VCS checkout; r?glandium draft
authorGregory Szorc <gps@mozilla.com>
Thu, 18 May 2017 16:06:49 -0700
changeset 582712 f2702e9a73cdc176c6fdefda6d40ff20d232b753
parent 582482 f9ca97a334296facd2e0ea5582e7f12d0fe70fe4
child 629829 09e4e07eac5ba2bc7b908ff262baaf57c59895ee
push id60148
push userbmo:gps@mozilla.com
push dateMon, 22 May 2017 23:56:27 +0000
reviewersglandium
bugs1361172
milestone55.0a1
Bug 1361172 - Rewrite code for finding files in VCS checkout; r?glandium We're getting an intermittent failure running `hg manifest` in CI. I have no clue why. What I do know is that we now have the mozversioncontrol Python package for containing utility code for interacting with version control. It is slightly more robust and I'm willing to support it more than I am check_utils.py. This commit adds a new API to our abstract repository class to obtain the files in the working directory by querying version control. Since I suspect cwd was coming into play in automation, I've also added a utility function to mozversioncontrol to attempt to find a version control checkout from the current working directory. It simply traces ancestor paths looking for a .hg or .git directory. Finally, I've ported all callers of the now-deleted API to the new one. The old code had some "../.." paths in it, meaning it only worked when cwd was just right. Since we resolve the absolute path to the checkout and store it on the repo object, I've updated the code so it should work no matter what cwd is as long as a repo can be found. I'm not 100% confident I found all consumers assuming cwd. But it's a start. I'm not 100% confident this will fix the intermittent issues in CI. But at least we should get a better error message and at least we'll be running less hacky code. MozReview-Commit-ID: AmCraHXcTEX
config/check_js_msg_encoding.py
config/check_macroassembler_style.py
config/check_spidermonkey_style.py
config/check_utils.py
python/mozversioncontrol/mozversioncontrol/__init__.py
--- a/config/check_js_msg_encoding.py
+++ b/config/check_js_msg_encoding.py
@@ -8,17 +8,19 @@
 #
 # JSErrorFormatString.format member should be in ASCII encoding.
 #----------------------------------------------------------------------------
 
 from __future__ import print_function
 
 import os
 import sys
-from check_utils import get_all_toplevel_filenames
+
+from mozversioncontrol import get_repository_from_env
+
 
 scriptname = os.path.basename(__file__);
 expected_encoding = 'ascii'
 
 # The following files don't define JSErrorFormatString.
 ignore_files = [
     'dom/base/domerr.msg',
     'js/xpconnect/src/xpc.msg',
@@ -40,20 +42,23 @@ def check_single_file(filename):
             log_fail(filename, 'not in {} encoding'.format(expected_encoding))
 
     log_pass(filename, 'ok')
     return True
 
 def check_files():
     result = True
 
-    for filename in get_all_toplevel_filenames():
+    repo = get_repository_from_env()
+    root = repo.path
+
+    for filename in repo.get_files_in_working_directory():
         if filename.endswith('.msg'):
             if filename not in ignore_files:
-                if not check_single_file(filename):
+                if not check_single_file(os.path.join(root, filename)):
                     result = False
 
     return result
 
 def main():
     if not check_files():
         sys.exit(1)
 
--- a/config/check_macroassembler_style.py
+++ b/config/check_macroassembler_style.py
@@ -20,19 +20,20 @@
 # proper methods annotations.
 #----------------------------------------------------------------------------
 
 from __future__ import print_function
 
 import difflib
 import os
 import re
-import subprocess
 import sys
-from check_utils import get_all_toplevel_filenames
+
+from mozversioncontrol import get_repository_from_env
+
 
 architecture_independent = set([ 'generic' ])
 all_architecture_names = set([ 'x86', 'x64', 'arm', 'arm64', 'mips32', 'mips64' ])
 all_shared_architecture_names = set([ 'x86_shared', 'mips_shared', 'arm', 'arm64' ])
 
 reBeforeArg = "(?<=[(,\s])"
 reArgType = "(?P<type>[\w\s:*&]+)"
 reArgName = "(?P<name>\s\w+)"
@@ -126,17 +127,17 @@ def get_macroassembler_definitions(filen
         fileAnnot = get_file_annotation(filename)
     except:
         return []
 
     style_section = False
     code_section = False
     lines = ''
     signatures = []
-    with open(os.path.join('../..', filename)) as f:
+    with open(filename) as f:
         for line in f:
             if '//{{{ check_macroassembler_style' in line:
                 style_section = True
             elif '//}}} check_macroassembler_style' in line:
                 style_section = False
             if not style_section:
                 continue
 
@@ -166,17 +167,17 @@ def get_macroassembler_definitions(filen
                 continue
 
     return signatures
 
 def get_macroassembler_declaration(filename):
     style_section = False
     lines = ''
     signatures = []
-    with open(os.path.join('../..', filename)) as f:
+    with open(filename) as f:
         for line in f:
             if '//{{{ check_macroassembler_style' in line:
                 style_section = True
             elif '//}}} check_macroassembler_style' in line:
                 style_section = False
             if not style_section:
                 continue
 
@@ -233,23 +234,27 @@ def generate_file_content(signatures):
 
 def check_style():
     # We read from the header file the signature of each function.
     decls = dict()      # type: dict(signature => ['x86', 'x64'])
 
     # We infer from each file the signature of each MacroAssembler function.
     defs = dict()       # type: dict(signature => ['x86', 'x64'])
 
+    repo = get_repository_from_env()
+
     # Select the appropriate files.
-    for filename in get_all_toplevel_filenames():
+    for filename in repo.get_files_in_working_directory():
         if not filename.startswith('js/src/jit/'):
             continue
         if 'MacroAssembler' not in filename:
             continue
 
+        filename = os.path.join(repo.path, filename)
+
         if filename.endswith('MacroAssembler.h'):
             decls = append_signatures(decls, get_macroassembler_declaration(filename))
         else:
             defs = append_signatures(defs, get_macroassembler_definitions(filename))
 
     # Compare declarations and definitions output.
     difflines = difflib.unified_diff(generate_file_content(decls),
                                      generate_file_content(defs),
--- a/config/check_spidermonkey_style.py
+++ b/config/check_spidermonkey_style.py
@@ -35,20 +35,20 @@
 #   isolation, but don't try to do any order checking between such blocks.
 #----------------------------------------------------------------------------
 
 from __future__ import print_function
 
 import difflib
 import os
 import re
-import subprocess
 import sys
-import traceback
-from check_utils import get_all_toplevel_filenames
+
+from mozversioncontrol import get_repository_from_env
+
 
 # We don't bother checking files in these directories, because they're (a) auxiliary or (b)
 # imported code that doesn't follow our coding style.
 ignored_js_src_dirs = [
    'js/src/config/',            # auxiliary stuff
    'js/src/ctypes/libffi/',     # imported code
    'js/src/devtools/',          # auxiliary stuff
    'js/src/editline/',          # imported code
@@ -244,18 +244,20 @@ def check_style():
     # - "js/src/vm/String.h"        -> "vm/String.h"
 
     non_js_dirnames = ('mfbt/',
                        'memory/mozalloc/',
                        'mozglue/')  # type: tuple(str)
     non_js_inclnames = set()        # type: set(inclname)
     js_names = dict()               # type: dict(filename, inclname)
 
+    repo = get_repository_from_env()
+
     # Select the appropriate files.
-    for filename in get_all_toplevel_filenames():
+    for filename in repo.get_files_in_working_directory():
         for non_js_dir in non_js_dirnames:
             if filename.startswith(non_js_dir) and filename.endswith('.h'):
                 inclname = 'mozilla/' + filename.split('/')[-1]
                 non_js_inclnames.add(inclname)
 
         if filename.startswith('js/public/') and filename.endswith('.h'):
             inclname = 'js/' + filename[len('js/public/'):]
             js_names[filename] = inclname
@@ -280,17 +282,17 @@ def check_style():
         inclname = js_names[filename]
         file_kind = FileKind.get(filename)
         if file_kind == FileKind.C or file_kind == FileKind.CPP or \
            file_kind == FileKind.H or file_kind == FileKind.INL_H:
             included_h_inclnames = set()    # type: set(inclname)
 
             # This script is run in js/src/, so prepend '../../' to get to the root of the Mozilla
             # source tree.
-            with open(os.path.join('../..', filename)) as f:
+            with open(os.path.join(repo.path, filename)) as f:
                 do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames)
 
         edges[inclname] = included_h_inclnames
 
     find_cycles(all_inclnames, edges)
 
     # Compare expected and actual output.
     difflines = difflib.unified_diff(expected_output, actual_output,
deleted file mode 100644
--- a/config/check_utils.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# vim: set ts=8 sts=4 et sw=4 tw=99:
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-import subprocess
-
-def get_all_toplevel_filenames():
-    '''Get a list of all the files in the (Mercurial or Git) repository.'''
-    failed_cmds = []
-    try:
-        cmd = ['hg', 'manifest', '-q']
-        all_filenames = subprocess.check_output(cmd, universal_newlines=True,
-                                                stderr=subprocess.PIPE).split('\n')
-        return all_filenames
-    except:
-        failed_cmds.append(cmd)
-
-    try:
-        # Get the relative path to the top-level directory.
-        cmd = ['git', 'rev-parse', '--show-cdup']
-        top_level = subprocess.check_output(cmd, universal_newlines=True,
-                                                stderr=subprocess.PIPE).split('\n')[0]
-        cmd = ['git', 'ls-files', '--full-name', top_level]
-        all_filenames = subprocess.check_output(cmd, universal_newlines=True,
-                                                stderr=subprocess.PIPE).split('\n')
-        return all_filenames
-    except:
-        failed_cmds.append(cmd)
-
-    raise Exception('failed to run any of the repo manifest commands', failed_cmds)
--- a/python/mozversioncontrol/mozversioncontrol/__init__.py
+++ b/python/mozversioncontrol/mozversioncontrol/__init__.py
@@ -71,16 +71,21 @@ class Repository(object):
         '''
         raise NotImplementedError
 
     def forget_add_remove_files(self, path):
         '''Undo the effects of a previous add_remove_files call for `path`.
         '''
         raise NotImplementedError
 
+    def get_files_in_working_directory(self):
+        """Obtain a list of managed files in the working directory."""
+        raise NotImplementedError
+
+
 class HgRepository(Repository):
     '''An implementation of `Repository` for Mercurial repositories.'''
     def __init__(self, path):
         super(HgRepository, self).__init__(path, 'hg')
         self._env[b'HGPLAIN'] = b'1'
 
     def get_modified_files(self):
         # Use --no-status to print just the filename.
@@ -94,16 +99,22 @@ class HgRepository(Repository):
         args = ['addremove', path]
         if self.tool_version >= b'3.9':
             args = ['--config', 'extensions.automv='] + args
         self._run(*args)
 
     def forget_add_remove_files(self, path):
         self._run('forget', path)
 
+    def get_files_in_working_directory(self):
+        # Can return backslashes on Windows. Normalize to forward slashes.
+        return list(p.replace('\\', '/') for p in
+                    self._run('files', '-0').split('\0'))
+
+
 class GitRepository(Repository):
     '''An implementation of `Repository` for Git repositories.'''
     def __init__(self, path):
         super(GitRepository, self).__init__(path, 'git')
 
     def get_modified_files(self):
         return self._run('diff', '--diff-filter=M', '--name-only').splitlines()
 
@@ -111,18 +122,46 @@ class GitRepository(Repository):
         return self._run('diff', '--diff-filter=A', '--name-only').splitlines()
 
     def add_remove_files(self, path):
         self._run('add', path)
 
     def forget_add_remove_files(self, path):
         self._run('reset', path)
 
+    def get_files_in_working_directory(self):
+        return self._run('ls-files', '-z').split('\0')
+
+
+class InvalidRepoPath(Exception):
+    """Represents a failure to find a VCS repo at a specified path."""
+
+
 def get_repository_object(path):
     '''Get a repository object for the repository at `path`.
     If `path` is not a known VCS repository, raise an exception.
     '''
     if os.path.isdir(os.path.join(path, '.hg')):
         return HgRepository(path)
     elif os.path.exists(os.path.join(path, '.git')):
         return GitRepository(path)
     else:
-        raise Exception('Unknown VCS, or not a source checkout: %s' % path)
+        raise InvalidRepoPath('Unknown VCS, or not a source checkout: %s' %
+                              path)
+
+
+def get_repository_from_env():
+    """Obtain a repository object by looking at the environment."""
+    def ancestors(path):
+        while path:
+            yield path
+            path, child = os.path.split(path)
+            if child == '':
+                break
+
+    for path in ancestors(os.getcwd()):
+        try:
+            return get_repository_object(path)
+        except InvalidRepoPath:
+            continue
+
+    raise Exception('Could not find Mercurial or Git checkout for %s' %
+                    os.getcwd())