Bug 1305877 - Make HashChunker stable; r?jgraham draft
authorGregory Szorc <gps@mozilla.com>
Tue, 27 Sep 2016 16:31:26 -0700
changeset 420703 17d8e22ef616cf7f24a360a56cbe9190e4429bdf
parent 420680 42c95d88aaaa7c2eca1d278399421d437441ac4d
child 420704 c9311e759dd59577a0618539088cecc6f51cfd89
push id31265
push usergszorc@mozilla.com
push dateTue, 04 Oct 2016 13:32:17 +0000
reviewersjgraham
bugs1305877
milestone52.0a1
Bug 1305877 - Make HashChunker stable; r?jgraham The built-in hash() function uses the backing memory address for hashing. This is essentially random. Switch to md5 so input is consistently hashed across processes. MozReview-Commit-ID: D52uzttE5hc
testing/web-platform/harness/wptrunner/testloader.py
--- a/testing/web-platform/harness/wptrunner/testloader.py
+++ b/testing/web-platform/harness/wptrunner/testloader.py
@@ -1,11 +1,11 @@
+import hashlib
 import json
 import os
-import sys
 import urlparse
 from abc import ABCMeta, abstractmethod
 from Queue import Empty
 from collections import defaultdict, OrderedDict, deque
 from multiprocessing import Queue
 
 import manifestinclude
 import manifestexpected
@@ -38,22 +38,24 @@ class Unchunked(TestChunker):
         assert self.total_chunks == 1
 
     def __call__(self, manifest):
         for item in manifest:
             yield item
 
 
 class HashChunker(TestChunker):
-    def __call__(self):
+    def __call__(self, manifest):
         chunk_index = self.chunk_number - 1
         for test_path, tests in manifest:
-            if hash(test_path) % self.total_chunks == chunk_index:
+            h = int(hashlib.md5(test_path).hexdigest(), 16)
+            if h % self.total_chunks == chunk_index:
                 yield test_path, tests
 
+
 class EqualTimeChunker(TestChunker):
     def _group_by_directory(self, manifest_items):
         """Split the list of manifest items into a ordered dict that groups tests in
         so that anything in the same subdirectory beyond a depth of 3 is in the same
         group. So all tests in a/b/c, a/b/c/d and a/b/c/e will be grouped together
         and separate to tests in a/b/f
 
         Returns: tuple (ordered dict of {test_dir: PathData}, total estimated runtime)