Bug 1401309 - [mozversioncontrol] Merge get_modified_files and get_added_files into a single function, r?gps draft
authorAndrew Halberstadt <ahalberstadt@mozilla.com>
Wed, 20 Sep 2017 10:06:11 -0400
changeset 671956 519bd5b9386eaa4cb87101e55a87fb5aa75741bb
parent 671955 46ef33cade91b5bf31d1e1ed1aa03b536cc070ac
child 671957 770fe8a9981a39d2891913b297710e6b676a4c6f
push id82100
push userahalberstadt@mozilla.com
push dateThu, 28 Sep 2017 14:54:56 +0000
reviewersgps
bugs1401309
milestone58.0a1
Bug 1401309 - [mozversioncontrol] Merge get_modified_files and get_added_files into a single function, r?gps There's currently a function for getting added files (A) and modified files (M). We'll also eventually need the ability to get deleted files (D) and any combination of the above, e.g (AM). Rather than creating a new function for each possible case, let's have a single function where you can pass in which modifiers you are interested in. With this patch, if you want all modified and added files, you can do: get_changed_files('AM') By default 'ADM' is used. This also adds a 'mode' option for git. This allows consumers to return staged files, unstaged files or both. The default ('unstaged') keeps the current behaviour in tact. MozReview-Commit-ID: 9IA1bxaJS80
python/mozbuild/mozbuild/vendor_aom.py
python/mozbuild/mozbuild/vendor_rust.py
python/mozversioncontrol/mozversioncontrol/__init__.py
--- a/python/mozbuild/mozbuild/vendor_aom.py
+++ b/python/mozbuild/mozbuild/vendor_aom.py
@@ -195,17 +195,17 @@ Please check manually and update the ven
                          cwd=target)
 
     def check_modified_files(self):
         '''
         Ensure that there aren't any uncommitted changes to files
         in the working copy, since we're going to change some state
         on the user.
         '''
-        modified = self.repository.get_modified_files()
+        modified = self.repository.get_changed_files('M')
         if modified:
             self.log(logging.ERROR, 'modified_files', {},
                      '''You have uncommitted changes to the following files:
 
 {files}
 
 Please commit or stash these changes before vendoring, or re-run with `--ignore-modified`.
 '''.format(files='\n'.join(sorted(modified))))
--- a/python/mozbuild/mozbuild/vendor_rust.py
+++ b/python/mozbuild/mozbuild/vendor_rust.py
@@ -55,17 +55,17 @@ class VendorRust(MozbuildObject):
 
     def check_modified_files(self):
         '''
         Ensure that there aren't any uncommitted changes to files
         in the working copy, since we're going to change some state
         on the user. Allow changes to Cargo.{toml,lock} since that's
         likely to be a common use case.
         '''
-        modified = [f for f in self.repository.get_modified_files() if os.path.basename(f) not in ('Cargo.toml', 'Cargo.lock')]
+        modified = [f for f in self.repository.get_changed_files('M') if os.path.basename(f) not in ('Cargo.toml', 'Cargo.lock')]
         if modified:
             self.log(logging.ERROR, 'modified_files', {},
                      '''You have uncommitted changes to the following files:
 
 {files}
 
 Please commit or stash these changes before vendoring, or re-run with `--ignore-modified`.
 '''.format(files='\n'.join(sorted(modified))))
@@ -291,17 +291,17 @@ license file's hash.
             sys.exit(1)
 
         self.repository.add_remove_files(vendor_dir)
 
         # 100k is a reasonable upper bound on source file size.
         FILESIZE_LIMIT = 100 * 1024
         large_files = set()
         cumulative_added_size = 0
-        for f in self.repository.get_added_files():
+        for f in self.repository.get_changed_files('A'):
             path = mozpath.join(self.topsrcdir, f)
             size = os.stat(path).st_size
             cumulative_added_size += size
             if size > FILESIZE_LIMIT:
                 large_files.add(f)
 
         # Forcefully complain about large files being added, as history has
         # shown that large-ish files typically are not needed.
--- a/python/mozversioncontrol/mozversioncontrol/__init__.py
+++ b/python/mozversioncontrol/mozversioncontrol/__init__.py
@@ -53,16 +53,17 @@ class Repository(object):
 
     __metaclass__ = abc.ABCMeta
 
     def __init__(self, path, tool):
         self.path = os.path.abspath(path)
         self._tool = get_tool_path(tool)
         self._env = os.environ.copy()
         self._version = None
+        self._valid_diff_filter = ('m', 'a', 'd')
 
     def __enter__(self):
         return self
 
     def __exit__(self, exc_type, exc_value, exc_tb):
         pass
 
     def _run(self, *args):
@@ -93,24 +94,32 @@ class Repository(object):
 
         A sparse checkout is defined as a working directory that only
         materializes a subset of files in a given revision.
 
         Returns a bool.
         """
 
     @abc.abstractmethod
-    def get_modified_files(self):
-        '''Return a list of files that are modified in this repository's
-        working copy.'''
+    def get_changed_files(self, diff_filter, mode='unstaged'):
+        """Return a list of files that are changed in this repository's
+        working copy.
+
+        ``diff_filter`` controls which kinds of modifications are returned.
+        It is a string which may only contain the following characters:
 
-    @abc.abstractmethod
-    def get_added_files(self):
-        '''Return a list of files that are added in this repository's
-        working copy.'''
+            A - Include files that were added
+            D - Include files that were deleted
+            M - Include files that were modified
+
+        By default, all three will be included.
+
+        ``mode`` can be one of 'unstaged', 'staged' or 'all'. Only has an
+        affect on git. Defaults to 'unstaged'.
+        """
 
     @abc.abstractmethod
     def add_remove_files(self, path):
         '''Add and remove files under `path` in this repository's working copy.
         '''
 
     @abc.abstractmethod
     def forget_add_remove_files(self, path):
@@ -191,23 +200,31 @@ class HgRepository(Repository):
             st = os.stat(sparse)
             return st.st_size > 0
         except OSError as e:
             if e.errno != errno.ENOENT:
                 raise
 
             return False
 
-    def get_modified_files(self):
-        # Use --no-status to print just the filename.
-        return self._run('status', '--modified', '--no-status').splitlines()
+    def _format_diff_filter(self, diff_filter):
+        df = diff_filter.lower()
+        assert all(f in self._valid_diff_filter for f in df)
+
+        # Mercurial uses 'r' to denote removed files whereas git uses 'd'.
+        if 'd' in df:
+            df.replace('d', 'r')
 
-    def get_added_files(self):
+        return df.lower()
+
+    def get_changed_files(self, diff_filter='ADM', mode='unstaged'):
+        df = self._format_diff_filter(diff_filter)
+
         # Use --no-status to print just the filename.
-        return self._run('status', '--added', '--no-status').splitlines()
+        return self._run('status', '--no-status', '-{}'.format(df)).splitlines()
 
     def add_remove_files(self, path):
         args = ['addremove', path]
         if self.tool_version >= b'3.9':
             args = ['--config', 'extensions.automv='] + args
         self._run(*args)
 
     def forget_add_remove_files(self, path):
@@ -240,21 +257,26 @@ class GitRepository(Repository):
     @property
     def name(self):
         return 'git'
 
     def sparse_checkout_present(self):
         # Not yet implemented.
         return False
 
-    def get_modified_files(self):
-        return self._run('diff', '--diff-filter=M', '--name-only').splitlines()
+    def get_changed_files(self, diff_filter='ADM', mode='unstaged'):
+        assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
 
-    def get_added_files(self):
-        return self._run('diff', '--diff-filter=A', '--name-only').splitlines()
+        cmd = ['diff', '--diff-filter={}'.format(diff_filter.upper()), '--name-only']
+        if mode == 'staged':
+            cmd.append('--cached')
+        elif mode == 'all':
+            cmd.append('HEAD')
+
+        return self._run(*cmd).splitlines()
 
     def add_remove_files(self, path):
         self._run('add', path)
 
     def forget_add_remove_files(self, path):
         self._run('reset', path)
 
     def get_files_in_working_directory(self):