Bug 1305877 - Make HashChunker stable; r?jgraham
The built-in hash() function uses the backing memory address for
hashing. This is essentially random. Switch to md5 so input is
consistently hashed across processes.
MozReview-Commit-ID: D52uzttE5hc
--- a/testing/web-platform/harness/wptrunner/testloader.py
+++ b/testing/web-platform/harness/wptrunner/testloader.py
@@ -1,11 +1,11 @@
+import hashlib
import json
import os
-import sys
import urlparse
from abc import ABCMeta, abstractmethod
from Queue import Empty
from collections import defaultdict, OrderedDict, deque
from multiprocessing import Queue
import manifestinclude
import manifestexpected
@@ -38,22 +38,24 @@ class Unchunked(TestChunker):
assert self.total_chunks == 1
def __call__(self, manifest):
for item in manifest:
yield item
class HashChunker(TestChunker):
- def __call__(self):
+ def __call__(self, manifest):
chunk_index = self.chunk_number - 1
for test_path, tests in manifest:
- if hash(test_path) % self.total_chunks == chunk_index:
+ h = int(hashlib.md5(test_path).hexdigest(), 16)
+ if h % self.total_chunks == chunk_index:
yield test_path, tests
+
class EqualTimeChunker(TestChunker):
def _group_by_directory(self, manifest_items):
"""Split the list of manifest items into a ordered dict that groups tests in
so that anything in the same subdirectory beyond a depth of 3 is in the same
group. So all tests in a/b/c, a/b/c/d and a/b/c/e will be grouped together
and separate to tests in a/b/f
Returns: tuple (ordered dict of {test_dir: PathData}, total estimated runtime)