Bug 1270506 - [mozlint] Refactor the include/exclude path filtering algorithm, r?smacleod draft
authorAndrew Halberstadt <ahalberstadt@mozilla.com>
Thu, 05 May 2016 17:20:33 -0400
changeset 367829 195fbc63b501412d8e3a009efbcbd74b3112b232
parent 367249 d0be57e84807ce0853b2406de7ff6abb195ac898
child 367830 0e6dbc3c06e64aaa206f52af29bb67dd9b12d801
push id18366
push userahalberstadt@mozilla.com
push dateTue, 17 May 2016 13:04:02 +0000
reviewerssmacleod
bugs1270506
milestone49.0a1
Bug 1270506 - [mozlint] Refactor the include/exclude path filtering algorithm, r?smacleod The current algorithm for filtering down tests is too naive. For example, given the following directory structured: parent - foo - bar - baz And the following include/exclude directives: include = ['foo'] exclude = ['foo/bar'] Then running ./mach lint parent and ./mach lint foo/baz should both lint all files in baz but no files in bar. This provides a nice way to include/exclude directories, while allowing the underlying linters to find appropriate files to lint *within* those directories. tl;dr - Straight file paths (no globs) will be passed straight to the underlying linter as is. While paths with globs will first be resolved to a list of matching file paths. MozReview-Commit-ID: Eag48Lnkp3l
python/mozlint/mozlint/pathutils.py
python/mozlint/mozlint/types.py
new file mode 100644
--- /dev/null
+++ b/python/mozlint/mozlint/pathutils.py
@@ -0,0 +1,135 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import unicode_literals
+
+import os
+
+from mozpack import path as mozpath
+from mozpack.files import FileFinder
+
+
+class FilterPath(object):
+    """Helper class to make comparing and matching file paths easier."""
+    def __init__(self, path, exclude=None):
+        self.path = os.path.normpath(path)
+        self._finder = None
+        self.exclude = exclude
+
+    @property
+    def finder(self):
+        if self._finder:
+            return self._finder
+        self._finder = FileFinder(
+            self.path, find_executables=False, ignore=self.exclude)
+        return self._finder
+
+    @property
+    def exists(self):
+        return os.path.exists(self.path)
+
+    @property
+    def isfile(self):
+        return os.path.isfile(self.path)
+
+    @property
+    def isdir(self):
+        return os.path.isdir(self.path)
+
+    def join(self, *args):
+        return FilterPath(os.path.join(self, *args))
+
+    def match(self, patterns):
+        return any(mozpath.match(self.path, pattern.path) for pattern in patterns)
+
+    def contains(self, other):
+        """Return True if other is a subdirectory of self or equals self."""
+        if isinstance(other, FilterPath):
+            other = other.path
+        a = os.path.abspath(self.path)
+        b = os.path.normpath(os.path.abspath(other))
+
+        if b.startswith(a):
+            return True
+        return False
+
+    def __repr__(self):
+        return repr(self.path)
+
+
+def filterpaths(paths, include=None, exclude=None):
+    """Filters a list of paths.
+
+    Given a list of paths, and a list of include and exclude
+    directives, return the set of paths that should be linted.
+
+    :param paths: A starting list of paths to possibly lint.
+    :param include: A list of include directives. May contain glob patterns.
+    :param exclude: A list of exclude directives. May contain glob patterns.
+    :returns: A tuple containing a list of file paths to lint, and a list
+              of file paths that should be excluded (but that the algorithm
+              was unable to apply).
+    """
+    if not include and not exclude:
+        return paths
+
+    include = map(FilterPath, include or [])
+    exclude = map(FilterPath, exclude or [])
+
+    # Paths with and without globs will be handled separately,
+    # pull them apart now.
+    includepaths = [p for p in include if p.exists]
+    excludepaths = [p for p in exclude if p.exists]
+
+    includeglobs = [p for p in include if not p.exists]
+    excludeglobs = [p for p in exclude if not p.exists]
+
+    keep = set()
+    discard = set()
+    for path in map(FilterPath, paths):
+        # First handle include/exclude directives
+        # that exist (i.e don't have globs)
+        for inc in includepaths:
+            # Only excludes that are subdirectories of the include
+            # path matter.
+            excs = [e for e in excludepaths if inc.contains(e)]
+
+            if path.contains(inc):
+                # If specified path is an ancestor of include path,
+                # then lint the include path.
+                keep.add(inc)
+
+                # We can't apply these exclude paths without explicitly
+                # including every sibling file. Rather than do that,
+                # just return them and hope the underlying linter will
+                # deal with them.
+                discard.update(excs)
+
+            elif inc.contains(path):
+                # If the include path is an ancestor of the specified
+                # path, then add the specified path only if there are
+                # no exclude paths in-between them.
+                if not any(e.contains(path) for e in excs):
+                    keep.add(path)
+
+        # Next handle include/exclude directives that
+        # contain globs.
+        if path.isfile:
+            # If the specified path is a file it must be both
+            # matched by an include directive and not matched
+            # by an exclude directive.
+            if not path.match(includeglobs):
+                continue
+            elif path.match(excludeglobs):
+                continue
+            keep.add(path)
+        elif path.isdir:
+            # If the specified path is a directory, use a
+            # FileFinder to resolve all relevant globs.
+            path.exclude = excludeglobs
+            for pattern in includeglobs:
+                for p, f in path.finder.find(pattern.path):
+                    keep.add(path.join(p))
+
+    return ([f.path for f in keep], [f.path for f in discard])
--- a/python/mozlint/mozlint/types.py
+++ b/python/mozlint/mozlint/types.py
@@ -1,20 +1,19 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-import os
+from __future__ import unicode_literals
+
 import re
 from abc import ABCMeta, abstractmethod
 
-from mozpack import path as mozpath
-from mozpack.files import FileFinder
-
 from . import result
+from .pathutils import filterpaths
 
 
 class BaseType(object):
     """Abstract base class for all types of linters."""
     __metaclass__ = ABCMeta
     batch = False
 
     def __call__(self, paths, linter, **lintargs):
@@ -24,67 +23,31 @@ class BaseType(object):
         :param linter: Linter definition paths are being linted against.
         :param lintargs: External arguments to the linter not defined in
                          the definition, but passed in by a consumer.
         :returns: A list of :class:`~result.ResultContainer` objects.
         """
         exclude = lintargs.get('exclude', [])
         exclude.extend(linter.get('exclude', []))
 
-        paths = self._filter(paths, linter.get('include'), exclude)
+        paths, exclude = filterpaths(paths, linter.get('include'), exclude)
         if not paths:
             return
 
+        lintargs['exclude'] = exclude
         if self.batch:
             return self._lint(paths, linter, **lintargs)
 
         errors = []
         for p in paths:
             result = self._lint(p, linter, **lintargs)
             if result:
                 errors.extend(result)
         return errors
 
-    def _filter(self, paths, include=None, exclude=None):
-        if not include and not exclude:
-            return paths
-
-        if include:
-            include = map(os.path.normpath, include)
-
-        if exclude:
-            exclude = map(os.path.normpath, exclude)
-
-        def match(path, patterns):
-            return any(mozpath.match(path, pattern) for pattern in patterns)
-
-        filtered = []
-        for path in paths:
-            if os.path.isfile(path):
-                if include and not match(path, include):
-                    continue
-                elif exclude and match(path, exclude):
-                    continue
-                filtered.append(path)
-            elif os.path.isdir(path):
-                finder = FileFinder(path, find_executables=False, ignore=exclude)
-                if self.batch:
-                    # Batch means the underlying linter will be responsible for finding
-                    # matching files in the directory. Return the path as is if there
-                    # exists at least one matching file.
-                    if any(finder.contains(pattern) for pattern in include):
-                        filtered.append(path)
-                else:
-                    # Convert the directory to a list of matching files.
-                    for pattern in include:
-                        filtered.extend([os.path.join(path, p)
-                                         for p, f in finder.find(pattern)])
-
-        return filtered
-
     @abstractmethod
     def _lint(self, path):
         pass
 
 
 class LineType(BaseType):
     """Abstract base class for linter types that check each line individually.