Bug 1391114 - Make `mach artifact toolchain` validate toolchain downloads. r?gps draft
authorMike Hommey <mh+mozilla@glandium.org>
Thu, 17 Aug 2017 09:36:27 +0900
changeset 647818 7689e377f265643d0260eec9c0f53cdeb47e843b
parent 647817 63ca686c3f1e870649b6d9c559973d100573aec2
child 726644 1ce0ab3d084803851eb63e4ea0729210bb4362fd
push id74552
push userbmo:mh+mozilla@glandium.org
push dateThu, 17 Aug 2017 00:46:53 +0000
reviewersgps
bugs1391114
milestone57.0a1
Bug 1391114 - Make `mach artifact toolchain` validate toolchain downloads. r?gps Tooltool manifests contain digests that have been used to validate tooltool downloads. Toolchain artifacts don't benefit from that, and as a result, an incomplete download can be considered as finished, and unpack fail after that, without retrying, even with --retry. Fortunately, the chain of trust artifacts do contains digests for taskcluster artifacts, as long as the jobs that created the artifacts have chain of trust enabled. As of now, the goal is not cryptographic validation of the download, but to ensure that we got the complete file, and to trigger a new download if we haven't.
python/mozbuild/mozbuild/action/tooltool.py
python/mozbuild/mozbuild/mach_commands.py
--- a/python/mozbuild/mozbuild/action/tooltool.py
+++ b/python/mozbuild/mozbuild/action/tooltool.py
@@ -136,17 +136,17 @@ class FileRecord(object):
             with open(self.filename, 'rb') as f:
                 return self.digest == digest_file(f, self.algorithm)
         else:
             log.debug(
                 "trying to validate digest on a missing file, %s', self.filename")
             raise MissingFileException(filename=self.filename)
 
     def validate(self):
-        if self.validate_size():
+        if self.size is None or self.validate_size():
             if self.validate_digest():
                 return True
         return False
 
     def describe(self):
         if self.present() and self.validate():
             return "'%s' is present and valid" % self.filename
         elif self.present():
--- a/python/mozbuild/mozbuild/mach_commands.py
+++ b/python/mozbuild/mozbuild/mach_commands.py
@@ -1831,16 +1831,42 @@ class PackageFrontend(MachCommandBase):
                 self.filename = cache.fetch(self.url)
                 return self.filename
 
             def validate(self):
                 if self.size is None and self.digest is None:
                     return True
                 return super(DownloadRecord, self).validate()
 
+        class ArtifactRecord(DownloadRecord):
+            def __init__(self, task_id, artifact_name):
+                cot = cache._download_manager.session.get(
+                    get_artifact_url(task_id, 'public/chainOfTrust.json.asc'))
+                digest = algorithm = None
+                if cot.status_code == 200:
+                    # The file is GPG-signed, but we don't care about validating
+                    # that. Instead of parsing the PGP signature, we just take
+                    # the one line we're interested in, which starts with a `{`.
+                    data = {}
+                    for l in cot.content.splitlines():
+                        if l.startswith('{'):
+                            try:
+                                data = json.loads(l)
+                                break
+                            except Exception:
+                                pass
+                for algorithm, digest in (data.get('artifacts', {})
+                                              .get(artifact_name, {}).items()):
+                    pass
+
+                name = os.path.basename(artifact_name)
+                super(ArtifactRecord, self).__init__(
+                    get_artifact_url(task_id, artifact_name), name,
+                    None, digest, algorithm, unpack=True)
+
         records = OrderedDict()
         downloaded = []
 
         if tooltool_manifest:
             manifest = open_manifest(tooltool_manifest)
             for record in manifest.file_records:
                 url = '{}/{}/{}'.format(tooltool_url, record.algorithm,
                                         record.digest)
@@ -1899,32 +1925,29 @@ class PackageFrontend(MachCommandBase):
                 task_id = optimize_task(task, {})
                 artifact_name = task.attributes.get('toolchain-artifact')
                 if task_id in (True, False) or not artifact_name:
                     self.log(logging.ERROR, 'artifact', {'build': user_value},
                              'Could not find artifacts for a toolchain build '
                              'named `{build}`')
                     return 1
 
-                name = os.path.basename(artifact_name)
-                records[name] = DownloadRecord(
-                    get_artifact_url(task_id, artifact_name),
-                    name, None, None, None, unpack=True)
+                record = ArtifactRecord(task_id, artifact_name)
+                records[record.filename] = record
 
         # Handle the list of files of the form path@task-id on the command
         # line. Each of those give a path to an artifact to download.
         for f in files:
             if '@' not in f:
                 self.log(logging.ERROR, 'artifact', {},
                          'Expected a list of files of the form path@task-id')
                 return 1
             name, task_id = f.rsplit('@', 1)
-            records[os.path.basename(name)] = DownloadRecord(
-                get_artifact_url(task_id, name), os.path.basename(name),
-                None, None, None, unpack=True)
+            record = ArtifactRecord(task_id, name)
+            records[record.filename] = record
 
         for record in records.itervalues():
             self.log(logging.INFO, 'artifact', {'name': record.basename},
                      'Downloading {name}')
             valid = False
             # sleeptime is 60 per retry.py, used by tooltool_wrapper.sh
             for attempt, _ in enumerate(redo.retrier(attempts=retry+1,
                                                      sleeptime=60)):