Bug 1401684 - Add option to disable unstable tests with wpt-update; r?jgraham draft
authorMaja Frydrychowicz <mjzffr@gmail.com>
Wed, 20 Sep 2017 15:26:47 -0400
changeset 669054 62bbf8e2ca7749bafc55f3a8411cf2a88260cfd9
parent 669023 2cd3752963fc8f24f7c202687eab55e83222f608
child 732847 4193dfc2406a2bebd9b1e2c404d6d0442e397c15
push id81204
push userbmo:mjzffr@gmail.com
push dateFri, 22 Sep 2017 13:21:27 +0000
reviewersjgraham
bugs1401684
milestone58.0a1
Bug 1401684 - Add option to disable unstable tests with wpt-update; r?jgraham Add a --stability option to wpt-update. If test results are inconsistent across many runs, the test is disabled for the condition(s) indicated by the corresponding log files. The --stability option is used to specify a reason for disabling the tests (typically a bug number), which is recorded in metadata. Disabled test paths are printed to stdout. MozReview-Commit-ID: DEIqRkN7NzR
testing/web-platform/tests/tools/wptrunner/wptrunner/manifestupdate.py
testing/web-platform/tests/tools/wptrunner/wptrunner/metadata.py
testing/web-platform/tests/tools/wptrunner/wptrunner/update/metadata.py
testing/web-platform/tests/tools/wptrunner/wptrunner/update/update.py
testing/web-platform/tests/tools/wptrunner/wptrunner/wptcommandline.py
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/manifestupdate.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/manifestupdate.py
@@ -24,18 +24,21 @@ at runtime.
 When a result for a test is to be updated set_result on the
 [Sub]TestNode is called to store the new result, alongside the
 existing conditional that result's run info matched, if any. Once all
 new results are known, coalesce_expected is called to compute the new
 set of results and conditionals. The AST of the underlying parsed manifest
 is updated with the changes, and the result is serialised to a file.
 """
 
+
 class ConditionError(Exception):
-    pass
+    def __init__(self, cond=None):
+        self.cond = cond
+
 
 Result = namedtuple("Result", ["run_info", "status"])
 
 
 def data_cls_getter(output_node, visited_node):
     # visited_node is intentionally unused
     if output_node is None:
         return ExpectedManifest
@@ -97,25 +100,27 @@ class ExpectedManifest(ManifestItem):
 
         return test_id in self.child_map
 
     @property
     def url(self):
         return urlparse.urljoin(self.url_base,
                                 "/".join(self.test_path.split(os.path.sep)))
 
+
 class TestNode(ManifestItem):
     def __init__(self, node):
         """Tree node associated with a particular test in a manifest
 
         :param node: AST node associated with the test"""
 
         ManifestItem.__init__(self, node)
         self.updated_expected = []
         self.new_expected = []
+        self.new_disabled = False
         self.subtests = {}
         self.default_status = None
         self._from_file = True
 
     @classmethod
     def create(cls, test_type, test_id):
         """Create a TestNode corresponding to a given test
 
@@ -177,48 +182,49 @@ class TestNode(ManifestItem):
                 if result.status != cond.value:
                     self.root.modified = True
                 break
         else:
             # We didn't find a previous value for this
             self.new_expected.append(Result(run_info, result.status))
             self.root.modified = True
 
-    def coalesce_expected(self):
+    def coalesce_expected(self, stability=None):
         """Update the underlying manifest AST for this test based on all the
         added results.
 
         This will update existing conditionals if they got the same result in
         all matching runs in the updated results, will delete existing conditionals
         that get more than one different result in the updated run, and add new
         conditionals for anything that doesn't match an existing conditional.
 
-        Conditionals not matched by any added result are not changed."""
+        Conditionals not matched by any added result are not changed.
 
-        final_conditionals = []
+        When `stability` is not None, disable any test that shows multiple
+        unexpected results for the same set of parameters.
+        """
 
         try:
             unconditional_status = self.get("expected")
         except KeyError:
             unconditional_status = self.default_status
 
         for conditional_value, results in self.updated_expected:
             if not results:
                 # The conditional didn't match anything in these runs so leave it alone
-                final_conditionals.append(conditional_value)
+                pass
             elif all(results[0].status == result.status for result in results):
                 # All the new values for this conditional matched, so update the node
                 result = results[0]
                 if (result.status == unconditional_status and
                     conditional_value.condition_node is not None):
                     if "expected" in self:
                         self.remove_value("expected", conditional_value)
                 else:
                     conditional_value.value = result.status
-                    final_conditionals.append(conditional_value)
             elif conditional_value.condition_node is not None:
                 # Blow away the existing condition and rebuild from scratch
                 # This isn't sure to work if we have a conditional later that matches
                 # these values too, but we can hope, verify that we get the results
                 # we expect, and if not let a human sort it out
                 self.remove_value("expected", conditional_value)
                 self.new_expected.extend(results)
             elif conditional_value.condition_node is None:
@@ -229,30 +235,32 @@ class TestNode(ManifestItem):
         # condition except for the default condition
 
         if self.new_expected:
             if all(self.new_expected[0].status == result.status
                    for result in self.new_expected) and not self.updated_expected:
                 status = self.new_expected[0].status
                 if status != self.default_status:
                     self.set("expected", status, condition=None)
-                    final_conditionals.append(self._data["expected"][-1])
             else:
                 try:
                     conditionals = group_conditionals(
                         self.new_expected,
                         property_order=self.root.property_order,
                         boolean_properties=self.root.boolean_properties)
-                except ConditionError:
-                    print "Conflicting test results for %s, cannot update" % self.root.test_path
+                except ConditionError as e:
+                    if stability is not None:
+                       self.set("disabled", stability or "unstable", e.cond.children[0])
+                       self.new_disabled = True
+                    else:
+                        print "Conflicting test results for %s, cannot update" % self.root.test_path
                     return
                 for conditional_node, status in conditionals:
                     if status != unconditional_status:
                         self.set("expected", status, condition=conditional_node.children[0])
-                        final_conditionals.append(self._data["expected"][-1])
 
         if ("expected" in self._data and
             len(self._data["expected"]) > 0 and
             self._data["expected"][-1].condition_node is None and
             self._data["expected"][-1].value == self.default_status):
 
             self.remove_value("expected", self._data["expected"][-1])
 
@@ -363,16 +371,19 @@ def group_conditionals(values, property_
         if prop in properties:
             include_props.append(prop)
 
     conditions = {}
 
     for run_info, status in values:
         prop_set = tuple((prop, run_info[prop]) for prop in include_props)
         if prop_set in conditions:
+            if conditions[prop_set][1] != status:
+                # A prop_set contains contradictory results
+                raise ConditionError(make_expr(prop_set, status, boolean_properties))
             continue
 
         expr = make_expr(prop_set, status, boolean_properties=boolean_properties)
         conditions[prop_set] = (expr, status)
 
     return conditions.values()
 
 
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/metadata.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/metadata.py
@@ -24,43 +24,55 @@ logger = structuredlog.StructuredLogger(
 def load_test_manifests(serve_root, test_paths):
     do_delayed_imports(serve_root)
     manifest_loader = testloader.ManifestLoader(test_paths, False)
     return manifest_loader.load()
 
 
 def update_expected(test_paths, serve_root, log_file_names,
                     rev_old=None, rev_new="HEAD", ignore_existing=False,
-                    sync_root=None, property_order=None, boolean_properties=None):
+                    sync_root=None, property_order=None, boolean_properties=None,
+                    stability=None):
     """Update the metadata files for web-platform-tests based on
-    the results obtained in a previous run"""
+    the results obtained in a previous run or runs
+
+    If stability is not None, assume log_file_names refers to logs from repeated
+    test jobs, disable tests that don't behave as expected on all runs"""
 
     manifests = load_test_manifests(serve_root, test_paths)
 
     change_data = {}
 
     if sync_root is not None:
         if rev_old is not None:
             rev_old = git("rev-parse", rev_old, repo=sync_root).strip()
         rev_new = git("rev-parse", rev_new, repo=sync_root).strip()
 
         if rev_old is not None:
             change_data = load_change_data(rev_old, rev_new, repo=sync_root)
 
-
     expected_map_by_manifest = update_from_logs(manifests,
                                                 *log_file_names,
                                                 ignore_existing=ignore_existing,
                                                 property_order=property_order,
-                                                boolean_properties=boolean_properties)
+                                                boolean_properties=boolean_properties,
+                                                stability=stability)
 
     for test_manifest, expected_map in expected_map_by_manifest.iteritems():
         url_base = manifests[test_manifest]["url_base"]
         metadata_path = test_paths[url_base]["metadata_path"]
         write_changes(metadata_path, expected_map)
+        if stability is not None:
+            for tree in expected_map.itervalues():
+                for test in tree.iterchildren():
+                    for subtest in test.iterchildren():
+                        if subtest.new_disabled:
+                            print os.path.dirname(subtest.root.test_path) + "/" + subtest.name
+                    if test.new_disabled:
+                        print test.root.test_path
 
     results_changed = [item.test_path for item in expected_map.itervalues() if item.modified]
 
     return unexpected_changes(manifests, change_data, results_changed)
 
 
 def do_delayed_imports(serve_root):
     global manifest, manifestitem
@@ -116,24 +128,27 @@ def unexpected_changes(manifests, change
 # Build a hash of filename: properties
 # For each different set of properties, gather all chunks
 # For each chunk in the set of chunks, go through all tests
 # for each test, make a map of {conditionals: [(platform, new_value)]}
 # Repeat for each platform
 # For each test in the list of tests:
 #   for each conditional:
 #      If all the new values match (or there aren't any) retain that conditional
-#      If any new values mismatch mark the test as needing human attention
+#      If any new values mismatch:
+#           If stability and any repeated values don't match, disable the test
+#           else mark the test as needing human attention
 #   Check if all the RHS values are the same; if so collapse the conditionals
 
 
 def update_from_logs(manifests, *log_filenames, **kwargs):
     ignore_existing = kwargs.get("ignore_existing", False)
     property_order = kwargs.get("property_order")
     boolean_properties = kwargs.get("boolean_properties")
+    stability = kwargs.get("stability")
 
     expected_map = {}
     id_test_map = {}
 
     for test_manifest, paths in manifests.iteritems():
         expected_map_manifest, id_path_map_manifest = create_test_tree(
             paths["metadata_path"],
             test_manifest,
@@ -147,18 +162,18 @@ def update_from_logs(manifests, *log_fil
     for log_filename in log_filenames:
         with open(log_filename) as f:
             updater.update_from_log(f)
 
     for manifest_expected in expected_map.itervalues():
         for tree in manifest_expected.itervalues():
             for test in tree.iterchildren():
                 for subtest in test.iterchildren():
-                    subtest.coalesce_expected()
-                test.coalesce_expected()
+                    subtest.coalesce_expected(stability=stability)
+                test.coalesce_expected(stability=stability)
 
     return expected_map
 
 def directory_manifests(metadata_path):
     rv = []
     for dirpath, dirname, filenames in os.walk(metadata_path):
         if "__dir__.ini" in filenames:
             rel_path = os.path.relpath(dirpath, metadata_path)
@@ -278,17 +293,16 @@ class ExpectedUpdater(object):
         test_cls = wpttest.manifest_test_cls[test.test_type]
 
         if data["status"] == "SKIP":
             return
 
         result = test_cls.result_cls(
             data["status"],
             data.get("message"))
-
         test.set_result(self.run_info, result)
         del self.test_cache[test_id]
 
 
 def create_test_tree(metadata_path, test_manifest, property_order=None,
                      boolean_properties=None):
     expected_map = {}
     id_test_map = {}
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/update/metadata.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/update/metadata.py
@@ -28,17 +28,18 @@ class UpdateExpected(Step):
 
         state.needs_human = metadata.update_expected(state.paths,
                                                      state.serve_root,
                                                      state.run_log,
                                                      rev_old=None,
                                                      ignore_existing=state.ignore_existing,
                                                      sync_root=sync_root,
                                                      property_order=state.property_order,
-                                                     boolean_properties=state.boolean_properties)
+                                                     boolean_properties=state.boolean_properties,
+                                                     stability=state.stability)
 
 
 class CreateMetadataPatch(Step):
     """Create a patch/commit for the metadata checkout"""
 
     def create(self, state):
         if not state.patch:
             return
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/update/update.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/update/update.py
@@ -85,16 +85,17 @@ class UpdateMetadata(Step):
     def create(self, state):
         if not state.kwargs["run_log"]:
             return
 
         kwargs = state.kwargs
         with state.push(["local_tree", "sync_tree", "paths", "serve_root"]):
             state.run_log = kwargs["run_log"]
             state.ignore_existing = kwargs["ignore_existing"]
+            state.stability = kwargs["stability"]
             state.patch = kwargs["patch"]
             state.suite_name = kwargs["suite_name"]
             state.product = kwargs["product"]
             state.config = kwargs["config"]
             runner = MetadataUpdateRunner(self.logger, state)
             runner.run()
 
 
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/wptcommandline.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/wptcommandline.py
@@ -451,16 +451,19 @@ def create_parser_update(product_choices
     parser.add_argument("--rev", action="store", help="Revision to sync to")
     parser.add_argument("--patch", action="store_true", dest="patch", default=None,
                         help="Create a VCS commit containing the changes.")
     parser.add_argument("--no-patch", action="store_false", dest="patch",
                         help="Don't create a VCS commit containing the changes.")
     parser.add_argument("--sync", dest="sync", action="store_true", default=False,
                         help="Sync the tests with the latest from upstream (implies --patch)")
     parser.add_argument("--ignore-existing", action="store_true", help="When updating test results only consider results from the logfiles provided, not existing expectations.")
+    parser.add_argument("--stability", nargs="?", action="store", const="unstable", default=None,
+        help=("Reason for disabling tests. When updating test results, disable tests that have "
+              "inconsistent results across many runs with the given reason."))
     parser.add_argument("--continue", action="store_true", help="Continue a previously started run of the update script")
     parser.add_argument("--abort", action="store_true", help="Clear state from a previous incomplete run of the update script")
     parser.add_argument("--exclude", action="store", nargs="*",
                         help="List of glob-style paths to exclude when syncing tests")
     parser.add_argument("--include", action="store", nargs="*",
                         help="List of glob-style paths to include which would otherwise be excluded when syncing tests")
     # Should make this required iff run=logfile
     parser.add_argument("run_log", nargs="*", type=abs_path,