Bug 1459737 - Move cache configuration to standalone function; r?dustin draft
authorGregory Szorc <gps@mozilla.com>
Fri, 04 May 2018 17:54:07 -0700
changeset 792203 c03eb6cfdfbe809e96219898a92a793867c45df2
parent 792202 824607b0b36d2899eb1c1f7efd10de64a634ca59
child 792204 4cb05343f49c3877a08252aa0b0fdee4ab14f5a7
push id109042
push userbmo:gps@mozilla.com
push dateMon, 07 May 2018 21:25:20 +0000
reviewersdustin
bugs1459737
milestone62.0a1
Bug 1459737 - Move cache configuration to standalone function; r?dustin main() is quite long. And the control flow will become more complicated as we support Windows. Let's move the bulk of the cache configuration code into a standalone function so main() is less cluttered. MozReview-Commit-ID: xredCubr1E
taskcluster/scripts/run-task
--- a/taskcluster/scripts/run-task
+++ b/taskcluster/scripts/run-task
@@ -190,16 +190,149 @@ def chown_recursive(path, user, group, u
             # File may be a symlink that points to nowhere. In which case
             # os.chown() would fail because it attempts to follow the
             # symlink. We only care about directory entries, not what
             # they point to. So setting the owner of the symlink should
             # be sufficient.
             os.lchown(os.path.join(root, f), uid, gid)
 
 
+def configure_cache_posix(cache, user, group,
+                          untrusted_caches, running_as_root):
+    """Configure a cache path on POSIX platforms.
+
+    For each cache, we write out a special file denoting attributes and
+    capabilities of run-task and the task being executed. These attributes
+    are used by subsequent run-task invocations to validate that use of
+    the cache is acceptable.
+
+    We /could/ blow away the cache data on requirements mismatch.
+    While this would be convenient, this could result in "competing" tasks
+    effectively undoing the other's work. This would slow down task
+    execution in aggregate. Without monitoring for this, people may not notice
+    the problem and tasks would be slower than they could be. We follow the
+    principle of "fail fast" to ensure optimal task execution.
+
+    We also write an audit log of who used the caches. This log is printed
+    during failures to help aid debugging.
+    """
+
+    our_requirements = {
+        # Include a version string that we can bump whenever to trigger
+        # fresh caches. The actual value is not relevant and doesn't need
+        # to follow any explicit order. Since taskgraph bakes this file's
+        # hash into cache names, any change to this file/version is sufficient
+        # to force the use of a new cache.
+        b'version=1',
+        # Include the UID and GID the task will run as to ensure that tasks
+        # with different UID and GID don't share the same cache.
+        b'uid=%d' % user.pw_uid,
+        b'gid=%d' % group.gr_gid,
+    }
+
+    requires_path = os.path.join(cache, '.cacherequires')
+    audit_path = os.path.join(cache, '.cachelog')
+
+    # The cache is empty. Configure it.
+    if not os.listdir(cache):
+        print_line(b'cache', b'cache %s is empty; writing requirements: '
+                             b'%s\n' % (
+                   cache, b' '.join(sorted(our_requirements))))
+
+        # We write a requirements file so future invocations know what the
+        # requirements are.
+        with open(requires_path, 'wb') as fh:
+            fh.write(b'\n'.join(sorted(our_requirements)))
+
+        # And make it read-only as a precaution against deletion.
+        os.chmod(requires_path, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
+
+        write_audit_entry(audit_path,
+                          'created; requirements: %s' %
+                          ', '.join(sorted(our_requirements)))
+
+        set_dir_permissions(cache, user.pw_uid, group.gr_gid)
+        return
+
+    # The cache has content and we have a requirements file. Validate
+    # requirements alignment.
+    if os.path.exists(requires_path):
+        with open(requires_path, 'rb') as fh:
+            wanted_requirements = set(fh.read().splitlines())
+
+        print_line(b'cache', b'cache %s exists; requirements: %s\n' % (
+            cache, b' '.join(sorted(wanted_requirements))))
+
+        missing = wanted_requirements - our_requirements
+
+        # Allow requirements mismatch for uid/gid if and only if caches
+        # are untrusted. This allows cache behavior on Try to be
+        # reasonable. Otherwise, random tasks could "poison" cache
+        # usability by introducing uid/gid mismatches. For untrusted
+        # environments like Try, this is a perfectly reasonable thing to
+        # allow.
+        if missing and untrusted_caches and running_as_root and \
+                all(s.startswith(('uid=', 'gid=')) for s in missing):
+            print_line(b'cache',
+                       b'cache %s uid/gid mismatch; this is acceptable '
+                       b'because caches for this task are untrusted; '
+                       b'changing ownership to facilitate cache use\n' %
+                       cache)
+            chown_recursive(cache, user.pw_name, group.gr_name, user.pw_uid,
+                            group.gr_gid)
+
+            # And write out the updated reality.
+            with open(requires_path, 'wb') as fh:
+                fh.write(b'\n'.join(sorted(our_requirements)))
+
+            write_audit_entry(audit_path,
+                              'chown; requirements: %s' %
+                              ', '.join(sorted(our_requirements)))
+
+        elif missing:
+            print('error: requirements for populated cache %s differ from '
+                  'this task' % cache)
+            print('cache requirements: %s' % ' '.join(sorted(
+                wanted_requirements)))
+            print('our requirements:   %s' % ' '.join(sorted(
+                our_requirements)))
+            if any(s.startswith(('uid=', 'gid=')) for s in missing):
+                print(CACHE_UID_GID_MISMATCH)
+
+            write_audit_entry(audit_path,
+                              'requirements mismatch; wanted: %s' %
+                              ', '.join(sorted(our_requirements)))
+
+            print('')
+            print('audit log:')
+            with open(audit_path, 'rb') as fh:
+                print(fh.read())
+
+            return True
+        else:
+            write_audit_entry(audit_path, 'used')
+
+        # We don't need to adjust permissions here because the cache is
+        # associated with a uid/gid and the first task should have set
+        # a proper owner/group.
+
+        return
+
+    # The cache has content and no requirements file. This shouldn't
+    # happen because run-task should be the first thing that touches a
+    # cache.
+    print('error: cache %s is not empty and is missing a '
+          '.cacherequires file; the cache names for this task are '
+          'likely mis-configured or TASKCLUSTER_CACHES is not set '
+          'properly' % cache)
+
+    write_audit_entry(audit_path, 'missing .cacherequires')
+    return True
+
+
 def vcs_checkout(source_repo, dest, store_path,
                  base_repo=None, revision=None, branch=None,
                  fetch_hgfingerprint=False, sparse_profile=None):
     # Specify method to checkout a revision. This defaults to revisions as
     # SHA-1 strings, but also supports symbolic revisions like `tip` via the
     # branch flag.
     if revision:
         revision_flag = b'--revision'
@@ -337,158 +470,40 @@ def main(args):
             return 1
     else:
         uid = gid = gids = None
 
     # Validate caches.
     #
     # Taskgraph should pass in a list of paths that are caches via an
     # environment variable (which we don't want to pass down to child
-    # processes). For each cache, we write out a special file denoting
-    # attributes and capabilities of run-task and the task being executed.
-    # These attributes are used by subsequent run-task invocations to
-    # validate that use of the cache is acceptable.
-    #
-    # We /could/ blow away the cache data on requirements mismatch.
-    # While this would be convenient, this could result in "competing" tasks
-    # effectively undoing the other's work. This would slow down task
-    # execution in aggregate. Without monitoring for this, people may not notice
-    # the problem and tasks would be slower than they could be. We follow the
-    # principle of "fail fast" to ensure optimal task execution.
-    #
-    # We also write an audit log of who used the caches. This log is printed
-    # during failures to help aid debugging.
+    # processes).
 
     if 'TASKCLUSTER_CACHES' in os.environ:
         caches = os.environ['TASKCLUSTER_CACHES'].split(';')
         del os.environ['TASKCLUSTER_CACHES']
     else:
         caches = []
 
     if 'TASKCLUSTER_UNTRUSTED_CACHES' in os.environ:
         untrusted_caches = True
         del os.environ['TASKCLUSTER_UNTRUSTED_CACHES']
     else:
         untrusted_caches = False
 
-    our_requirements = {
-        # Include a version string that we can bump whenever to trigger
-        # fresh caches. The actual value is not relevant and doesn't need
-        # to follow any explicit order. Since taskgraph bakes this file's
-        # hash into cache names, any change to this file/version is sufficient
-        # to force the use of a new cache.
-        b'version=1',
-    }
-
-    # Include the UID and GID the task will run as to ensure that tasks
-    # with different UID and GID don't share the same cache.
-    if uid is not None:
-        our_requirements.add(b'uid=%d' % uid)
-    if gid is not None:
-        our_requirements.add(b'gid=%d' % gid)
-
     for cache in caches:
         if not os.path.isdir(cache):
             print('error: cache %s is not a directory; this should never '
                   'happen' % cache)
             return 1
 
-        requires_path = os.path.join(cache, '.cacherequires')
-        audit_path = os.path.join(cache, '.cachelog')
-
-        # The cache is empty. Configure it.
-        if not os.listdir(cache):
-            print_line(b'cache', b'cache %s is empty; writing requirements: '
-                       b'%s\n' % (cache, b' '.join(sorted(our_requirements))))
-
-            # We write a requirements file so future invocations know what the
-            # requirements are.
-            with open(requires_path, 'wb') as fh:
-                fh.write(b'\n'.join(sorted(our_requirements)))
-
-            # And make it read-only as a precaution against deletion.
-            os.chmod(requires_path, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
-
-            write_audit_entry(audit_path,
-                              'created; requirements: %s' %
-                              ', '.join(sorted(our_requirements)))
-
-            set_dir_permissions(cache, uid, gid)
-
-        # The cache has content and we have a requirements file. Validate
-        # requirements alignment.
-        elif os.path.exists(requires_path):
-            with open(requires_path, 'rb') as fh:
-                wanted_requirements = set(fh.read().splitlines())
-
-            print_line(b'cache', b'cache %s exists; requirements: %s\n' % (
-                       cache, b' '.join(sorted(wanted_requirements))))
-
-            missing = wanted_requirements - our_requirements
+        purge = configure_cache_posix(cache, user, group, untrusted_caches,
+                                      running_as_root)
 
-            # Allow requirements mismatch for uid/gid if and only if caches
-            # are untrusted. This allows cache behavior on Try to be
-            # reasonable. Otherwise, random tasks could "poison" cache
-            # usability by introducing uid/gid mismatches. For untrusted
-            # environments like Try, this is a perfectly reasonable thing to
-            # allow.
-            if missing and untrusted_caches and running_as_root and \
-                    all(s.startswith(('uid=', 'gid=')) for s in missing):
-                print_line(b'cache',
-                           b'cache %s uid/gid mismatch; this is acceptable '
-                           b'because caches for this task are untrusted; '
-                           b'changing ownership to facilitate cache use\n' %
-                           cache)
-                chown_recursive(cache, user.pw_name, group.gr_name, uid, gid)
-
-                # And write out the updated reality.
-                with open(requires_path, 'wb') as fh:
-                    fh.write(b'\n'.join(sorted(our_requirements)))
-
-                write_audit_entry(audit_path,
-                                  'chown; requirements: %s' %
-                                  ', '.join(sorted(our_requirements)))
-
-            elif missing:
-                print('error: requirements for populated cache %s differ from '
-                      'this task' % cache)
-                print('cache requirements: %s' % ' '.join(sorted(
-                    wanted_requirements)))
-                print('our requirements:   %s' % ' '.join(sorted(
-                    our_requirements)))
-                if any(s.startswith(('uid=', 'gid=')) for s in missing):
-                    print(CACHE_UID_GID_MISMATCH)
-
-                write_audit_entry(audit_path,
-                                  'requirements mismatch; wanted: %s' %
-                                  ', '.join(sorted(our_requirements)))
-
-                print('')
-                print('audit log:')
-                with open(audit_path, 'rb') as fh:
-                    print(fh.read())
-
-                return EXIT_PURGE_CACHE
-            else:
-                write_audit_entry(audit_path, 'used')
-
-            # We don't need to adjust permissions here because the cache is
-            # associated with a uid/gid and the first task should have set
-            # a proper owner/group.
-
-        # The cache has content and no requirements file. This shouldn't
-        # happen because run-task should be the first thing that touches a
-        # cache.
-        else:
-            print('error: cache %s is not empty and is missing a '
-                  '.cacherequires file; the cache names for this task are '
-                  'likely mis-configured or TASKCLUSTER_CACHES is not set '
-                  'properly' % cache)
-
-            write_audit_entry(audit_path, 'missing .cacherequires')
+        if purge:
             return EXIT_PURGE_CACHE
 
     if 'TASKCLUSTER_VOLUMES' in os.environ:
         volumes = os.environ['TASKCLUSTER_VOLUMES'].split(';')
         del os.environ['TASKCLUSTER_VOLUMES']
     else:
         volumes = []