Bug 1353461 - [manifestparser] Implement a chunk_by_manifest algorithm, r?jmaher draft
authorAndrew Halberstadt <ahalberstadt@mozilla.com>
Tue, 13 Feb 2018 15:16:37 -0500
changeset 759224 bd8a7c41f61e27e311a35fd0f1535853742e9fae
parent 759182 ad3c6f89d67752309a473e57a47fb88f9da37683
child 759225 160501b1348a8a404072135fc88cf9f6617842d0
push id100313
push userahalberstadt@mozilla.com
push dateFri, 23 Feb 2018 22:02:04 +0000
reviewersjmaher
bugs1353461
milestone60.0a1
Bug 1353461 - [manifestparser] Implement a chunk_by_manifest algorithm, r?jmaher This implements a chunk_by_manifest algorithm. It is similar to chunk_by_slice in that it tries to make an even number of tests run in each chunk. However, unlike chunk_by_slice it will guarantee that tests in the same manifest will all run in the same chunk. This makes it suitable to use with run-by-manifest. This means the chunks won't be perfect (as manifests are differnet sizes). It is also prone to more randomization, similar to chunk-by-runtime. In fact, this algorithm is nearly identical to the chunk-by-runtime one, so it was refactored out to a base class. MozReview-Commit-ID: HI2ByxW0i8V
layout/tools/reftest/runreftest.py
testing/mozbase/manifestparser/manifestparser/filters.py
--- a/layout/tools/reftest/runreftest.py
+++ b/layout/tools/reftest/runreftest.py
@@ -823,17 +823,17 @@ class RefTest(object):
             # Name and path are expected by manifestparser, but not used in reftest.
             test['name'] = test['path'] = test['url1']
 
         mp = TestManifest(strict=False)
         mp.tests = tests
 
         filters = []
         if options.totalChunks:
-            filters.append(mpf.chunk_by_slice(options.thisChunk, options.totalChunks))
+            filters.append(mpf.chunk_by_manifest(options.thisChunk, options.totalChunks))
 
         tests = mp.active_tests(exists=False, filters=filters)
         return tests
 
     def runSerialTests(self, manifests, options, cmdargs=None):
         debuggerInfo = None
         if options.debugger:
             debuggerInfo = mozdebug.get_debugger_info(options.debugger, options.debuggerArgs,
--- a/testing/mozbase/manifestparser/manifestparser/filters.py
+++ b/testing/mozbase/manifestparser/manifestparser/filters.py
@@ -5,19 +5,20 @@
 """
 A filter is a callable that accepts an iterable of test objects and a
 dictionary of values, and returns a new iterable of test objects. It is
 possible to define custom filters if the built-in ones are not enough.
 """
 
 from __future__ import absolute_import
 
-from collections import defaultdict, MutableSequence
 import itertools
 import os
+from abc import ABCMeta, abstractmethod
+from collections import defaultdict, MutableSequence
 
 from .expression import (
     parse,
     ParseError,
 )
 
 
 # built-in filters
@@ -252,66 +253,91 @@ class chunk_by_dir(InstanceFilter):
         # simplicity.
         if self.this_chunk == 1:
             disabled_dirs = [v for k, v in tests_by_dir.iteritems()
                              if k not in ordered_dirs]
             for disabled_test in itertools.chain(*disabled_dirs):
                 yield disabled_test
 
 
-class chunk_by_runtime(InstanceFilter):
+class ManifestChunk(InstanceFilter):
+    """
+    Base class for chunking tests by manifest using a numerical key.
+    """
+    __metaclass__ = ABCMeta
+
+    def __init__(self, this_chunk, total_chunks, *args, **kwargs):
+        InstanceFilter.__init__(self, this_chunk, total_chunks, *args, **kwargs)
+        self.this_chunk = this_chunk
+        self.total_chunks = total_chunks
+
+    @abstractmethod
+    def key(self, tests):
+        pass
+
+    def __call__(self, tests, values):
+        tests = list(tests)
+        manifests = set(t['manifest'] for t in tests)
+
+        tests_by_manifest = []
+        for manifest in manifests:
+            mtests = [t for t in tests if t['manifest'] == manifest]
+            tests_by_manifest.append((self.key(mtests), mtests))
+        tests_by_manifest.sort(reverse=True)
+
+        tests_by_chunk = [[0, []] for i in range(self.total_chunks)]
+        for key, batch in tests_by_manifest:
+            # sort first by key, then by number of tests in case of a tie.
+            # This guarantees the chunk with the lowest key will always
+            # get the next batch of tests.
+            tests_by_chunk.sort(key=lambda x: (x[0], len(x[1])))
+            tests_by_chunk[0][0] += key
+            tests_by_chunk[0][1].extend(batch)
+
+        return (t for t in tests_by_chunk[self.this_chunk - 1][1])
+
+
+class chunk_by_manifest(ManifestChunk):
+    """
+    Chunking algorithm that tries to evenly distribute tests while ensuring
+    tests in the same manifest stay together.
+
+    :param this_chunk: the current chunk, 1 <= this_chunk <= total_chunks
+    :param total_chunks: the total number of chunks
+    """
+    def key(self, tests):
+        return len(tests)
+
+
+class chunk_by_runtime(ManifestChunk):
     """
     Chunking algorithm that attempts to group tests into chunks based on their
     average runtimes. It keeps manifests of tests together and pairs slow
     running manifests with fast ones.
 
     :param this_chunk: the current chunk, 1 <= this_chunk <= total_chunks
     :param total_chunks: the total number of chunks
     :param runtimes: dictionary of test runtime data, of the form
                      {<test path>: <average runtime>}
     :param default_runtime: value in seconds to assign tests that don't exist
                             in the runtimes file
     """
 
     def __init__(self, this_chunk, total_chunks, runtimes, default_runtime=0):
-        InstanceFilter.__init__(self, this_chunk, total_chunks, runtimes,
-                                default_runtime=default_runtime)
-        self.this_chunk = this_chunk
-        self.total_chunks = total_chunks
-
+        ManifestChunk.__init__(self, this_chunk, total_chunks, runtimes,
+                               default_runtime=default_runtime)
         # defaultdict(lambda:<int>) assigns all non-existent keys the value of
         # <int>. This means all tests we encounter that don't exist in the
         # runtimes file will be assigned `default_runtime`.
         self.runtimes = defaultdict(lambda: default_runtime)
         self.runtimes.update(runtimes)
 
-    def __call__(self, tests, values):
-        tests = list(tests)
-        manifests = set(t['manifest'] for t in tests)
-
-        def total_runtime(tests):
-            return sum(self.runtimes[t['relpath']] for t in tests
-                       if 'disabled' not in t)
-
-        tests_by_manifest = []
-        for manifest in manifests:
-            mtests = [t for t in tests if t['manifest'] == manifest]
-            tests_by_manifest.append((total_runtime(mtests), mtests))
-        tests_by_manifest.sort(reverse=True)
-
-        tests_by_chunk = [[0, []] for i in range(self.total_chunks)]
-        for runtime, batch in tests_by_manifest:
-            # sort first by runtime, then by number of tests in case of a tie.
-            # This guarantees the chunk with the fastest runtime will always
-            # get the next batch of tests.
-            tests_by_chunk.sort(key=lambda x: (x[0], len(x[1])))
-            tests_by_chunk[0][0] += runtime
-            tests_by_chunk[0][1].extend(batch)
-
-        return (t for t in tests_by_chunk[self.this_chunk - 1][1])
+    def key(self, tests):
+        return sum(self.runtimes[t['relpath']] for t in tests
+                   if 'disabled' not in t)
 
 
 class tags(InstanceFilter):
     """
     Removes tests that don't contain any of the given tags. This overrides
     InstanceFilter's __eq__ method, so multiple instances can be added.
     Multiple tag filters is equivalent to joining tags with the AND operator.