Bug 1401309 - [mozversioncontrol] Merge get_modified_files and get_added_files into a single function, r?gps
There's currently a function for getting added files (A) and modified files
(M). We'll also eventually need the ability to get deleted files (D) and any
combination of the above, e.g (AM). Rather than creating a new function for
each possible case, let's have a single function where you can pass in which
modifiers you are interested in. With this patch, if you want all modified and
added files, you can do:
get_changed_files('AM')
By default 'ADM' is used.
This also adds a 'mode' option for git. This allows consumers to return staged
files, unstaged files or both. The default ('unstaged') keeps the current
behaviour in tact.
MozReview-Commit-ID: 9IA1bxaJS80
--- a/python/mozbuild/mozbuild/vendor_aom.py
+++ b/python/mozbuild/mozbuild/vendor_aom.py
@@ -195,17 +195,17 @@ Please check manually and update the ven
cwd=target)
def check_modified_files(self):
'''
Ensure that there aren't any uncommitted changes to files
in the working copy, since we're going to change some state
on the user.
'''
- modified = self.repository.get_modified_files()
+ modified = self.repository.get_changed_files('M')
if modified:
self.log(logging.ERROR, 'modified_files', {},
'''You have uncommitted changes to the following files:
{files}
Please commit or stash these changes before vendoring, or re-run with `--ignore-modified`.
'''.format(files='\n'.join(sorted(modified))))
--- a/python/mozbuild/mozbuild/vendor_rust.py
+++ b/python/mozbuild/mozbuild/vendor_rust.py
@@ -55,17 +55,17 @@ class VendorRust(MozbuildObject):
def check_modified_files(self):
'''
Ensure that there aren't any uncommitted changes to files
in the working copy, since we're going to change some state
on the user. Allow changes to Cargo.{toml,lock} since that's
likely to be a common use case.
'''
- modified = [f for f in self.repository.get_modified_files() if os.path.basename(f) not in ('Cargo.toml', 'Cargo.lock')]
+ modified = [f for f in self.repository.get_changed_files('M') if os.path.basename(f) not in ('Cargo.toml', 'Cargo.lock')]
if modified:
self.log(logging.ERROR, 'modified_files', {},
'''You have uncommitted changes to the following files:
{files}
Please commit or stash these changes before vendoring, or re-run with `--ignore-modified`.
'''.format(files='\n'.join(sorted(modified))))
@@ -291,17 +291,17 @@ license file's hash.
sys.exit(1)
self.repository.add_remove_files(vendor_dir)
# 100k is a reasonable upper bound on source file size.
FILESIZE_LIMIT = 100 * 1024
large_files = set()
cumulative_added_size = 0
- for f in self.repository.get_added_files():
+ for f in self.repository.get_changed_files('A'):
path = mozpath.join(self.topsrcdir, f)
size = os.stat(path).st_size
cumulative_added_size += size
if size > FILESIZE_LIMIT:
large_files.add(f)
# Forcefully complain about large files being added, as history has
# shown that large-ish files typically are not needed.
--- a/python/mozversioncontrol/mozversioncontrol/__init__.py
+++ b/python/mozversioncontrol/mozversioncontrol/__init__.py
@@ -53,16 +53,17 @@ class Repository(object):
__metaclass__ = abc.ABCMeta
def __init__(self, path, tool):
self.path = os.path.abspath(path)
self._tool = get_tool_path(tool)
self._env = os.environ.copy()
self._version = None
+ self._valid_diff_filter = ('m', 'a', 'd')
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_tb):
pass
def _run(self, *args):
@@ -93,24 +94,32 @@ class Repository(object):
A sparse checkout is defined as a working directory that only
materializes a subset of files in a given revision.
Returns a bool.
"""
@abc.abstractmethod
- def get_modified_files(self):
- '''Return a list of files that are modified in this repository's
- working copy.'''
+ def get_changed_files(self, diff_filter, mode='unstaged'):
+ """Return a list of files that are changed in this repository's
+ working copy.
+
+ ``diff_filter`` controls which kinds of modifications are returned.
+ It is a string which may only contain the following characters:
- @abc.abstractmethod
- def get_added_files(self):
- '''Return a list of files that are added in this repository's
- working copy.'''
+ A - Include files that were added
+ D - Include files that were deleted
+ M - Include files that were modified
+
+ By default, all three will be included.
+
+ ``mode`` can be one of 'unstaged', 'staged' or 'all'. Only has an
+ affect on git. Defaults to 'unstaged'.
+ """
@abc.abstractmethod
def add_remove_files(self, path):
'''Add and remove files under `path` in this repository's working copy.
'''
@abc.abstractmethod
def forget_add_remove_files(self, path):
@@ -191,23 +200,31 @@ class HgRepository(Repository):
st = os.stat(sparse)
return st.st_size > 0
except OSError as e:
if e.errno != errno.ENOENT:
raise
return False
- def get_modified_files(self):
- # Use --no-status to print just the filename.
- return self._run('status', '--modified', '--no-status').splitlines()
+ def _format_diff_filter(self, diff_filter):
+ df = diff_filter.lower()
+ assert all(f in self._valid_diff_filter for f in df)
+
+ # Mercurial uses 'r' to denote removed files whereas git uses 'd'.
+ if 'd' in df:
+ df.replace('d', 'r')
- def get_added_files(self):
+ return df.lower()
+
+ def get_changed_files(self, diff_filter='ADM', mode='unstaged'):
+ df = self._format_diff_filter(diff_filter)
+
# Use --no-status to print just the filename.
- return self._run('status', '--added', '--no-status').splitlines()
+ return self._run('status', '--no-status', '-{}'.format(df)).splitlines()
def add_remove_files(self, path):
args = ['addremove', path]
if self.tool_version >= b'3.9':
args = ['--config', 'extensions.automv='] + args
self._run(*args)
def forget_add_remove_files(self, path):
@@ -240,21 +257,26 @@ class GitRepository(Repository):
@property
def name(self):
return 'git'
def sparse_checkout_present(self):
# Not yet implemented.
return False
- def get_modified_files(self):
- return self._run('diff', '--diff-filter=M', '--name-only').splitlines()
+ def get_changed_files(self, diff_filter='ADM', mode='unstaged'):
+ assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
- def get_added_files(self):
- return self._run('diff', '--diff-filter=A', '--name-only').splitlines()
+ cmd = ['diff', '--diff-filter={}'.format(diff_filter.upper()), '--name-only']
+ if mode == 'staged':
+ cmd.append('--cached')
+ elif mode == 'all':
+ cmd.append('HEAD')
+
+ return self._run(*cmd).splitlines()
def add_remove_files(self, path):
self._run('add', path)
def forget_add_remove_files(self, path):
self._run('reset', path)
def get_files_in_working_directory(self):