Bug 1353461 - [manifestparser] Implement a chunk_by_manifest algorithm, r?jmaher
This implements a chunk_by_manifest algorithm. It is similar to chunk_by_slice
in that it tries to make an even number of tests run in each chunk. However,
unlike chunk_by_slice it will guarantee that tests in the same manifest will
all run in the same chunk. This makes it suitable to use with run-by-manifest.
This means the chunks won't be perfect (as manifests are differnet sizes). It
is also prone to more randomization, similar to chunk-by-runtime.
In fact, this algorithm is nearly identical to the chunk-by-runtime one, so it
was refactored out to a base class.
MozReview-Commit-ID: HI2ByxW0i8V
--- a/layout/tools/reftest/runreftest.py
+++ b/layout/tools/reftest/runreftest.py
@@ -823,17 +823,17 @@ class RefTest(object):
# Name and path are expected by manifestparser, but not used in reftest.
test['name'] = test['path'] = test['url1']
mp = TestManifest(strict=False)
mp.tests = tests
filters = []
if options.totalChunks:
- filters.append(mpf.chunk_by_slice(options.thisChunk, options.totalChunks))
+ filters.append(mpf.chunk_by_manifest(options.thisChunk, options.totalChunks))
tests = mp.active_tests(exists=False, filters=filters)
return tests
def runSerialTests(self, manifests, options, cmdargs=None):
debuggerInfo = None
if options.debugger:
debuggerInfo = mozdebug.get_debugger_info(options.debugger, options.debuggerArgs,
--- a/testing/mozbase/manifestparser/manifestparser/filters.py
+++ b/testing/mozbase/manifestparser/manifestparser/filters.py
@@ -5,19 +5,20 @@
"""
A filter is a callable that accepts an iterable of test objects and a
dictionary of values, and returns a new iterable of test objects. It is
possible to define custom filters if the built-in ones are not enough.
"""
from __future__ import absolute_import
-from collections import defaultdict, MutableSequence
import itertools
import os
+from abc import ABCMeta, abstractmethod
+from collections import defaultdict, MutableSequence
from .expression import (
parse,
ParseError,
)
# built-in filters
@@ -252,66 +253,91 @@ class chunk_by_dir(InstanceFilter):
# simplicity.
if self.this_chunk == 1:
disabled_dirs = [v for k, v in tests_by_dir.iteritems()
if k not in ordered_dirs]
for disabled_test in itertools.chain(*disabled_dirs):
yield disabled_test
-class chunk_by_runtime(InstanceFilter):
+class ManifestChunk(InstanceFilter):
+ """
+ Base class for chunking tests by manifest using a numerical key.
+ """
+ __metaclass__ = ABCMeta
+
+ def __init__(self, this_chunk, total_chunks, *args, **kwargs):
+ InstanceFilter.__init__(self, this_chunk, total_chunks, *args, **kwargs)
+ self.this_chunk = this_chunk
+ self.total_chunks = total_chunks
+
+ @abstractmethod
+ def key(self, tests):
+ pass
+
+ def __call__(self, tests, values):
+ tests = list(tests)
+ manifests = set(t['manifest'] for t in tests)
+
+ tests_by_manifest = []
+ for manifest in manifests:
+ mtests = [t for t in tests if t['manifest'] == manifest]
+ tests_by_manifest.append((self.key(mtests), mtests))
+ tests_by_manifest.sort(reverse=True)
+
+ tests_by_chunk = [[0, []] for i in range(self.total_chunks)]
+ for key, batch in tests_by_manifest:
+ # sort first by key, then by number of tests in case of a tie.
+ # This guarantees the chunk with the lowest key will always
+ # get the next batch of tests.
+ tests_by_chunk.sort(key=lambda x: (x[0], len(x[1])))
+ tests_by_chunk[0][0] += key
+ tests_by_chunk[0][1].extend(batch)
+
+ return (t for t in tests_by_chunk[self.this_chunk - 1][1])
+
+
+class chunk_by_manifest(ManifestChunk):
+ """
+ Chunking algorithm that tries to evenly distribute tests while ensuring
+ tests in the same manifest stay together.
+
+ :param this_chunk: the current chunk, 1 <= this_chunk <= total_chunks
+ :param total_chunks: the total number of chunks
+ """
+ def key(self, tests):
+ return len(tests)
+
+
+class chunk_by_runtime(ManifestChunk):
"""
Chunking algorithm that attempts to group tests into chunks based on their
average runtimes. It keeps manifests of tests together and pairs slow
running manifests with fast ones.
:param this_chunk: the current chunk, 1 <= this_chunk <= total_chunks
:param total_chunks: the total number of chunks
:param runtimes: dictionary of test runtime data, of the form
{<test path>: <average runtime>}
:param default_runtime: value in seconds to assign tests that don't exist
in the runtimes file
"""
def __init__(self, this_chunk, total_chunks, runtimes, default_runtime=0):
- InstanceFilter.__init__(self, this_chunk, total_chunks, runtimes,
- default_runtime=default_runtime)
- self.this_chunk = this_chunk
- self.total_chunks = total_chunks
-
+ ManifestChunk.__init__(self, this_chunk, total_chunks, runtimes,
+ default_runtime=default_runtime)
# defaultdict(lambda:<int>) assigns all non-existent keys the value of
# <int>. This means all tests we encounter that don't exist in the
# runtimes file will be assigned `default_runtime`.
self.runtimes = defaultdict(lambda: default_runtime)
self.runtimes.update(runtimes)
- def __call__(self, tests, values):
- tests = list(tests)
- manifests = set(t['manifest'] for t in tests)
-
- def total_runtime(tests):
- return sum(self.runtimes[t['relpath']] for t in tests
- if 'disabled' not in t)
-
- tests_by_manifest = []
- for manifest in manifests:
- mtests = [t for t in tests if t['manifest'] == manifest]
- tests_by_manifest.append((total_runtime(mtests), mtests))
- tests_by_manifest.sort(reverse=True)
-
- tests_by_chunk = [[0, []] for i in range(self.total_chunks)]
- for runtime, batch in tests_by_manifest:
- # sort first by runtime, then by number of tests in case of a tie.
- # This guarantees the chunk with the fastest runtime will always
- # get the next batch of tests.
- tests_by_chunk.sort(key=lambda x: (x[0], len(x[1])))
- tests_by_chunk[0][0] += runtime
- tests_by_chunk[0][1].extend(batch)
-
- return (t for t in tests_by_chunk[self.this_chunk - 1][1])
+ def key(self, tests):
+ return sum(self.runtimes[t['relpath']] for t in tests
+ if 'disabled' not in t)
class tags(InstanceFilter):
"""
Removes tests that don't contain any of the given tags. This overrides
InstanceFilter's __eq__ method, so multiple instances can be added.
Multiple tag filters is equivalent to joining tags with the AND operator.