Bug 1401684 - Add option to disable unstable tests with wpt-update; r?jgraham
Add a --stability option to wpt-update. If test results are inconsistent
across many runs, the test is disabled for the condition(s) indicated by
the corresponding log files. The --stability option is used to specify
a reason for disabling the tests (typically a bug number), which is
recorded in metadata. Disabled test paths are printed to stdout.
MozReview-Commit-ID: DEIqRkN7NzR
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/manifestupdate.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/manifestupdate.py
@@ -24,18 +24,21 @@ at runtime.
When a result for a test is to be updated set_result on the
[Sub]TestNode is called to store the new result, alongside the
existing conditional that result's run info matched, if any. Once all
new results are known, coalesce_expected is called to compute the new
set of results and conditionals. The AST of the underlying parsed manifest
is updated with the changes, and the result is serialised to a file.
"""
+
class ConditionError(Exception):
- pass
+ def __init__(self, cond=None):
+ self.cond = cond
+
Result = namedtuple("Result", ["run_info", "status"])
def data_cls_getter(output_node, visited_node):
# visited_node is intentionally unused
if output_node is None:
return ExpectedManifest
@@ -97,25 +100,27 @@ class ExpectedManifest(ManifestItem):
return test_id in self.child_map
@property
def url(self):
return urlparse.urljoin(self.url_base,
"/".join(self.test_path.split(os.path.sep)))
+
class TestNode(ManifestItem):
def __init__(self, node):
"""Tree node associated with a particular test in a manifest
:param node: AST node associated with the test"""
ManifestItem.__init__(self, node)
self.updated_expected = []
self.new_expected = []
+ self.new_disabled = False
self.subtests = {}
self.default_status = None
self._from_file = True
@classmethod
def create(cls, test_type, test_id):
"""Create a TestNode corresponding to a given test
@@ -177,48 +182,49 @@ class TestNode(ManifestItem):
if result.status != cond.value:
self.root.modified = True
break
else:
# We didn't find a previous value for this
self.new_expected.append(Result(run_info, result.status))
self.root.modified = True
- def coalesce_expected(self):
+ def coalesce_expected(self, stability=None):
"""Update the underlying manifest AST for this test based on all the
added results.
This will update existing conditionals if they got the same result in
all matching runs in the updated results, will delete existing conditionals
that get more than one different result in the updated run, and add new
conditionals for anything that doesn't match an existing conditional.
- Conditionals not matched by any added result are not changed."""
+ Conditionals not matched by any added result are not changed.
- final_conditionals = []
+ When `stability` is not None, disable any test that shows multiple
+ unexpected results for the same set of parameters.
+ """
try:
unconditional_status = self.get("expected")
except KeyError:
unconditional_status = self.default_status
for conditional_value, results in self.updated_expected:
if not results:
# The conditional didn't match anything in these runs so leave it alone
- final_conditionals.append(conditional_value)
+ pass
elif all(results[0].status == result.status for result in results):
# All the new values for this conditional matched, so update the node
result = results[0]
if (result.status == unconditional_status and
conditional_value.condition_node is not None):
if "expected" in self:
self.remove_value("expected", conditional_value)
else:
conditional_value.value = result.status
- final_conditionals.append(conditional_value)
elif conditional_value.condition_node is not None:
# Blow away the existing condition and rebuild from scratch
# This isn't sure to work if we have a conditional later that matches
# these values too, but we can hope, verify that we get the results
# we expect, and if not let a human sort it out
self.remove_value("expected", conditional_value)
self.new_expected.extend(results)
elif conditional_value.condition_node is None:
@@ -229,30 +235,32 @@ class TestNode(ManifestItem):
# condition except for the default condition
if self.new_expected:
if all(self.new_expected[0].status == result.status
for result in self.new_expected) and not self.updated_expected:
status = self.new_expected[0].status
if status != self.default_status:
self.set("expected", status, condition=None)
- final_conditionals.append(self._data["expected"][-1])
else:
try:
conditionals = group_conditionals(
self.new_expected,
property_order=self.root.property_order,
boolean_properties=self.root.boolean_properties)
- except ConditionError:
- print "Conflicting test results for %s, cannot update" % self.root.test_path
+ except ConditionError as e:
+ if stability is not None:
+ self.set("disabled", stability or "unstable", e.cond.children[0])
+ self.new_disabled = True
+ else:
+ print "Conflicting test results for %s, cannot update" % self.root.test_path
return
for conditional_node, status in conditionals:
if status != unconditional_status:
self.set("expected", status, condition=conditional_node.children[0])
- final_conditionals.append(self._data["expected"][-1])
if ("expected" in self._data and
len(self._data["expected"]) > 0 and
self._data["expected"][-1].condition_node is None and
self._data["expected"][-1].value == self.default_status):
self.remove_value("expected", self._data["expected"][-1])
@@ -363,16 +371,19 @@ def group_conditionals(values, property_
if prop in properties:
include_props.append(prop)
conditions = {}
for run_info, status in values:
prop_set = tuple((prop, run_info[prop]) for prop in include_props)
if prop_set in conditions:
+ if conditions[prop_set][1] != status:
+ # A prop_set contains contradictory results
+ raise ConditionError(make_expr(prop_set, status, boolean_properties))
continue
expr = make_expr(prop_set, status, boolean_properties=boolean_properties)
conditions[prop_set] = (expr, status)
return conditions.values()
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/metadata.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/metadata.py
@@ -24,43 +24,55 @@ logger = structuredlog.StructuredLogger(
def load_test_manifests(serve_root, test_paths):
do_delayed_imports(serve_root)
manifest_loader = testloader.ManifestLoader(test_paths, False)
return manifest_loader.load()
def update_expected(test_paths, serve_root, log_file_names,
rev_old=None, rev_new="HEAD", ignore_existing=False,
- sync_root=None, property_order=None, boolean_properties=None):
+ sync_root=None, property_order=None, boolean_properties=None,
+ stability=None):
"""Update the metadata files for web-platform-tests based on
- the results obtained in a previous run"""
+ the results obtained in a previous run or runs
+
+ If stability is not None, assume log_file_names refers to logs from repeated
+ test jobs, disable tests that don't behave as expected on all runs"""
manifests = load_test_manifests(serve_root, test_paths)
change_data = {}
if sync_root is not None:
if rev_old is not None:
rev_old = git("rev-parse", rev_old, repo=sync_root).strip()
rev_new = git("rev-parse", rev_new, repo=sync_root).strip()
if rev_old is not None:
change_data = load_change_data(rev_old, rev_new, repo=sync_root)
-
expected_map_by_manifest = update_from_logs(manifests,
*log_file_names,
ignore_existing=ignore_existing,
property_order=property_order,
- boolean_properties=boolean_properties)
+ boolean_properties=boolean_properties,
+ stability=stability)
for test_manifest, expected_map in expected_map_by_manifest.iteritems():
url_base = manifests[test_manifest]["url_base"]
metadata_path = test_paths[url_base]["metadata_path"]
write_changes(metadata_path, expected_map)
+ if stability is not None:
+ for tree in expected_map.itervalues():
+ for test in tree.iterchildren():
+ for subtest in test.iterchildren():
+ if subtest.new_disabled:
+ print os.path.dirname(subtest.root.test_path) + "/" + subtest.name
+ if test.new_disabled:
+ print test.root.test_path
results_changed = [item.test_path for item in expected_map.itervalues() if item.modified]
return unexpected_changes(manifests, change_data, results_changed)
def do_delayed_imports(serve_root):
global manifest, manifestitem
@@ -116,24 +128,27 @@ def unexpected_changes(manifests, change
# Build a hash of filename: properties
# For each different set of properties, gather all chunks
# For each chunk in the set of chunks, go through all tests
# for each test, make a map of {conditionals: [(platform, new_value)]}
# Repeat for each platform
# For each test in the list of tests:
# for each conditional:
# If all the new values match (or there aren't any) retain that conditional
-# If any new values mismatch mark the test as needing human attention
+# If any new values mismatch:
+# If stability and any repeated values don't match, disable the test
+# else mark the test as needing human attention
# Check if all the RHS values are the same; if so collapse the conditionals
def update_from_logs(manifests, *log_filenames, **kwargs):
ignore_existing = kwargs.get("ignore_existing", False)
property_order = kwargs.get("property_order")
boolean_properties = kwargs.get("boolean_properties")
+ stability = kwargs.get("stability")
expected_map = {}
id_test_map = {}
for test_manifest, paths in manifests.iteritems():
expected_map_manifest, id_path_map_manifest = create_test_tree(
paths["metadata_path"],
test_manifest,
@@ -147,18 +162,18 @@ def update_from_logs(manifests, *log_fil
for log_filename in log_filenames:
with open(log_filename) as f:
updater.update_from_log(f)
for manifest_expected in expected_map.itervalues():
for tree in manifest_expected.itervalues():
for test in tree.iterchildren():
for subtest in test.iterchildren():
- subtest.coalesce_expected()
- test.coalesce_expected()
+ subtest.coalesce_expected(stability=stability)
+ test.coalesce_expected(stability=stability)
return expected_map
def directory_manifests(metadata_path):
rv = []
for dirpath, dirname, filenames in os.walk(metadata_path):
if "__dir__.ini" in filenames:
rel_path = os.path.relpath(dirpath, metadata_path)
@@ -278,17 +293,16 @@ class ExpectedUpdater(object):
test_cls = wpttest.manifest_test_cls[test.test_type]
if data["status"] == "SKIP":
return
result = test_cls.result_cls(
data["status"],
data.get("message"))
-
test.set_result(self.run_info, result)
del self.test_cache[test_id]
def create_test_tree(metadata_path, test_manifest, property_order=None,
boolean_properties=None):
expected_map = {}
id_test_map = {}
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/update/metadata.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/update/metadata.py
@@ -28,17 +28,18 @@ class UpdateExpected(Step):
state.needs_human = metadata.update_expected(state.paths,
state.serve_root,
state.run_log,
rev_old=None,
ignore_existing=state.ignore_existing,
sync_root=sync_root,
property_order=state.property_order,
- boolean_properties=state.boolean_properties)
+ boolean_properties=state.boolean_properties,
+ stability=state.stability)
class CreateMetadataPatch(Step):
"""Create a patch/commit for the metadata checkout"""
def create(self, state):
if not state.patch:
return
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/update/update.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/update/update.py
@@ -85,16 +85,17 @@ class UpdateMetadata(Step):
def create(self, state):
if not state.kwargs["run_log"]:
return
kwargs = state.kwargs
with state.push(["local_tree", "sync_tree", "paths", "serve_root"]):
state.run_log = kwargs["run_log"]
state.ignore_existing = kwargs["ignore_existing"]
+ state.stability = kwargs["stability"]
state.patch = kwargs["patch"]
state.suite_name = kwargs["suite_name"]
state.product = kwargs["product"]
state.config = kwargs["config"]
runner = MetadataUpdateRunner(self.logger, state)
runner.run()
--- a/testing/web-platform/tests/tools/wptrunner/wptrunner/wptcommandline.py
+++ b/testing/web-platform/tests/tools/wptrunner/wptrunner/wptcommandline.py
@@ -451,16 +451,19 @@ def create_parser_update(product_choices
parser.add_argument("--rev", action="store", help="Revision to sync to")
parser.add_argument("--patch", action="store_true", dest="patch", default=None,
help="Create a VCS commit containing the changes.")
parser.add_argument("--no-patch", action="store_false", dest="patch",
help="Don't create a VCS commit containing the changes.")
parser.add_argument("--sync", dest="sync", action="store_true", default=False,
help="Sync the tests with the latest from upstream (implies --patch)")
parser.add_argument("--ignore-existing", action="store_true", help="When updating test results only consider results from the logfiles provided, not existing expectations.")
+ parser.add_argument("--stability", nargs="?", action="store", const="unstable", default=None,
+ help=("Reason for disabling tests. When updating test results, disable tests that have "
+ "inconsistent results across many runs with the given reason."))
parser.add_argument("--continue", action="store_true", help="Continue a previously started run of the update script")
parser.add_argument("--abort", action="store_true", help="Clear state from a previous incomplete run of the update script")
parser.add_argument("--exclude", action="store", nargs="*",
help="List of glob-style paths to exclude when syncing tests")
parser.add_argument("--include", action="store", nargs="*",
help="List of glob-style paths to include which would otherwise be excluded when syncing tests")
# Should make this required iff run=logfile
parser.add_argument("run_log", nargs="*", type=abs_path,