Bug 1270506 - [mozlint] Refactor the include/exclude path filtering algorithm, r?smacleod
The current algorithm for filtering down tests is too naive. For example, given the following
directory structured:
parent
- foo
- bar
- baz
And the following include/exclude directives:
include = ['foo']
exclude = ['foo/bar']
Then running ./mach lint parent and ./mach lint foo/baz should both lint all files in baz but
no files in bar. This provides a nice way to include/exclude directories, while allowing the
underlying linters to find appropriate files to lint *within* those directories.
tl;dr - Straight file paths (no globs) will be passed straight to the underlying linter as is.
While paths with globs will first be resolved to a list of matching file paths.
MozReview-Commit-ID: Eag48Lnkp3l
new file mode 100644
--- /dev/null
+++ b/python/mozlint/mozlint/pathutils.py
@@ -0,0 +1,135 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import unicode_literals
+
+import os
+
+from mozpack import path as mozpath
+from mozpack.files import FileFinder
+
+
+class FilterPath(object):
+ """Helper class to make comparing and matching file paths easier."""
+ def __init__(self, path, exclude=None):
+ self.path = os.path.normpath(path)
+ self._finder = None
+ self.exclude = exclude
+
+ @property
+ def finder(self):
+ if self._finder:
+ return self._finder
+ self._finder = FileFinder(
+ self.path, find_executables=False, ignore=self.exclude)
+ return self._finder
+
+ @property
+ def exists(self):
+ return os.path.exists(self.path)
+
+ @property
+ def isfile(self):
+ return os.path.isfile(self.path)
+
+ @property
+ def isdir(self):
+ return os.path.isdir(self.path)
+
+ def join(self, *args):
+ return FilterPath(os.path.join(self, *args))
+
+ def match(self, patterns):
+ return any(mozpath.match(self.path, pattern.path) for pattern in patterns)
+
+ def contains(self, other):
+ """Return True if other is a subdirectory of self or equals self."""
+ if isinstance(other, FilterPath):
+ other = other.path
+ a = os.path.abspath(self.path)
+ b = os.path.normpath(os.path.abspath(other))
+
+ if b.startswith(a):
+ return True
+ return False
+
+ def __repr__(self):
+ return repr(self.path)
+
+
+def filterpaths(paths, include=None, exclude=None):
+ """Filters a list of paths.
+
+ Given a list of paths, and a list of include and exclude
+ directives, return the set of paths that should be linted.
+
+ :param paths: A starting list of paths to possibly lint.
+ :param include: A list of include directives. May contain glob patterns.
+ :param exclude: A list of exclude directives. May contain glob patterns.
+ :returns: A tuple containing a list of file paths to lint, and a list
+ of file paths that should be excluded (but that the algorithm
+ was unable to apply).
+ """
+ if not include and not exclude:
+ return paths
+
+ include = map(FilterPath, include or [])
+ exclude = map(FilterPath, exclude or [])
+
+ # Paths with and without globs will be handled separately,
+ # pull them apart now.
+ includepaths = [p for p in include if p.exists]
+ excludepaths = [p for p in exclude if p.exists]
+
+ includeglobs = [p for p in include if not p.exists]
+ excludeglobs = [p for p in exclude if not p.exists]
+
+ keep = set()
+ discard = set()
+ for path in map(FilterPath, paths):
+ # First handle include/exclude directives
+ # that exist (i.e don't have globs)
+ for inc in includepaths:
+ # Only excludes that are subdirectories of the include
+ # path matter.
+ excs = [e for e in excludepaths if inc.contains(e)]
+
+ if path.contains(inc):
+ # If specified path is an ancestor of include path,
+ # then lint the include path.
+ keep.add(inc)
+
+ # We can't apply these exclude paths without explicitly
+ # including every sibling file. Rather than do that,
+ # just return them and hope the underlying linter will
+ # deal with them.
+ discard.update(excs)
+
+ elif inc.contains(path):
+ # If the include path is an ancestor of the specified
+ # path, then add the specified path only if there are
+ # no exclude paths in-between them.
+ if not any(e.contains(path) for e in excs):
+ keep.add(path)
+
+ # Next handle include/exclude directives that
+ # contain globs.
+ if path.isfile:
+ # If the specified path is a file it must be both
+ # matched by an include directive and not matched
+ # by an exclude directive.
+ if not path.match(includeglobs):
+ continue
+ elif path.match(excludeglobs):
+ continue
+ keep.add(path)
+ elif path.isdir:
+ # If the specified path is a directory, use a
+ # FileFinder to resolve all relevant globs.
+ path.exclude = excludeglobs
+ for pattern in includeglobs:
+ for p, f in path.finder.find(pattern.path):
+ keep.add(path.join(p))
+
+ return ([f.path for f in keep], [f.path for f in discard])
--- a/python/mozlint/mozlint/types.py
+++ b/python/mozlint/mozlint/types.py
@@ -1,20 +1,19 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-import os
+from __future__ import unicode_literals
+
import re
from abc import ABCMeta, abstractmethod
-from mozpack import path as mozpath
-from mozpack.files import FileFinder
-
from . import result
+from .pathutils import filterpaths
class BaseType(object):
"""Abstract base class for all types of linters."""
__metaclass__ = ABCMeta
batch = False
def __call__(self, paths, linter, **lintargs):
@@ -24,67 +23,31 @@ class BaseType(object):
:param linter: Linter definition paths are being linted against.
:param lintargs: External arguments to the linter not defined in
the definition, but passed in by a consumer.
:returns: A list of :class:`~result.ResultContainer` objects.
"""
exclude = lintargs.get('exclude', [])
exclude.extend(linter.get('exclude', []))
- paths = self._filter(paths, linter.get('include'), exclude)
+ paths, exclude = filterpaths(paths, linter.get('include'), exclude)
if not paths:
return
+ lintargs['exclude'] = exclude
if self.batch:
return self._lint(paths, linter, **lintargs)
errors = []
for p in paths:
result = self._lint(p, linter, **lintargs)
if result:
errors.extend(result)
return errors
- def _filter(self, paths, include=None, exclude=None):
- if not include and not exclude:
- return paths
-
- if include:
- include = map(os.path.normpath, include)
-
- if exclude:
- exclude = map(os.path.normpath, exclude)
-
- def match(path, patterns):
- return any(mozpath.match(path, pattern) for pattern in patterns)
-
- filtered = []
- for path in paths:
- if os.path.isfile(path):
- if include and not match(path, include):
- continue
- elif exclude and match(path, exclude):
- continue
- filtered.append(path)
- elif os.path.isdir(path):
- finder = FileFinder(path, find_executables=False, ignore=exclude)
- if self.batch:
- # Batch means the underlying linter will be responsible for finding
- # matching files in the directory. Return the path as is if there
- # exists at least one matching file.
- if any(finder.contains(pattern) for pattern in include):
- filtered.append(path)
- else:
- # Convert the directory to a list of matching files.
- for pattern in include:
- filtered.extend([os.path.join(path, p)
- for p, f in finder.find(pattern)])
-
- return filtered
-
@abstractmethod
def _lint(self, path):
pass
class LineType(BaseType):
"""Abstract base class for linter types that check each line individually.