Bug 1460470 - More run-task Python 3 porting; r=mshal
☠☠ backed out by 278ac3ea0ce5 ☠ ☠
authorGregory Szorc <gps@mozilla.com>
Wed, 09 May 2018 21:15:36 -0700
changeset 796061 4902cab3ce5dab2d1756cf0cd5c95f40603c0a0e
parent 796060 19fe5702cf6d018b743108b35e86d1750f205a76
child 796062 33fe8423f88cb8158559bc736d89b4849b57d315
push id110148
push userbmo:gps@mozilla.com
push dateWed, 16 May 2018 22:07:12 +0000
reviewersmshal
bugs1460470
milestone62.0a1
Bug 1460470 - More run-task Python 3 porting; r=mshal Mostly normalization of str and bytes. Python 3 is annoying for systems level code where most things are bytes. MozReview-Commit-ID: KpvZGegBkYn
taskcluster/scripts/run-task
--- a/taskcluster/scripts/run-task
+++ b/taskcluster/scripts/run-task
@@ -86,18 +86,20 @@ EXIT_PURGE_CACHE = 72
 
 
 IS_POSIX = os.name == 'posix'
 IS_WINDOWS = os.name == 'nt'
 
 
 if PY3:
     bytestr = lambda x: x.encode('utf-8', 'strict')
+    sysstr = lambda x: x.decode('utf-8', 'strict')
 else:
     bytestr = bytes
+    sysstr = str
 
 
 def print_line(prefix, m):
     now = bytestr(datetime.datetime.utcnow().isoformat())
     # slice microseconds to 3 decimals.
     now = now[:-3] if now[-7:-6] == b'.' else now
     sys.stdout.buffer.write(b'[%s %sZ] %s' % (prefix, now, m))
     sys.stdout.buffer.flush()
@@ -185,20 +187,20 @@ def get_posix_user_group(user, group):
 
     # Find all groups to which this user is a member.
     gids = [g.gr_gid for g in grp.getgrall() if group in g.gr_mem]
 
     return user_record, group_record, gids
 
 
 def write_audit_entry(path, msg):
-    now = datetime.datetime.utcnow().isoformat()
+    now = bytestr(datetime.datetime.utcnow().isoformat())
     with open(path, 'ab') as fh:
         fh.write(b'[%sZ %s] %s\n' % (
-                 now, os.environ.get('TASK_ID', 'UNKNOWN'), msg))
+                 now, bytestr(os.environ.get('TASK_ID', 'UNKNOWN')), msg))
 
 
 WANTED_DIR_MODE = stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR
 
 
 def set_dir_permissions(path, uid, gid):
     st = os.lstat(path)
 
@@ -209,17 +211,17 @@ def set_dir_permissions(path, uid, gid):
     # them.
     if st.st_mode & WANTED_DIR_MODE != WANTED_DIR_MODE:
         os.chmod(path, st.st_mode | WANTED_DIR_MODE)
 
 
 def chown_recursive(path, user, group, uid, gid):
     print_line(b'chown',
                b'recursively changing ownership of %s to %s:%s\n' %
-               (path, user, group))
+               (bytestr(path), bytestr(user), bytestr(group)))
 
     set_dir_permissions(path, uid, gid)
 
     for root, dirs, files in os.walk(path):
         for d in dirs:
             set_dir_permissions(os.path.join(root, d), uid, gid)
 
         for f in files:
@@ -266,106 +268,106 @@ def configure_cache_posix(cache, user, g
 
     requires_path = os.path.join(cache, '.cacherequires')
     audit_path = os.path.join(cache, '.cachelog')
 
     # The cache is empty. Configure it.
     if not os.listdir(cache):
         print_line(b'cache', b'cache %s is empty; writing requirements: '
                              b'%s\n' % (
-                   cache, b' '.join(sorted(our_requirements))))
+                   bytestr(cache), b' '.join(sorted(our_requirements))))
 
         # We write a requirements file so future invocations know what the
         # requirements are.
         with open(requires_path, 'wb') as fh:
             fh.write(b'\n'.join(sorted(our_requirements)))
 
         # And make it read-only as a precaution against deletion.
         os.chmod(requires_path, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
 
         write_audit_entry(audit_path,
-                          'created; requirements: %s' %
-                          ', '.join(sorted(our_requirements)))
+                          b'created; requirements: %s' %
+                          b', '.join(sorted(our_requirements)))
 
         set_dir_permissions(cache, user.pw_uid, group.gr_gid)
         return
 
     # The cache has content and we have a requirements file. Validate
     # requirements alignment.
     if os.path.exists(requires_path):
         with open(requires_path, 'rb') as fh:
             wanted_requirements = set(fh.read().splitlines())
 
         print_line(b'cache', b'cache %s exists; requirements: %s\n' % (
-            cache, b' '.join(sorted(wanted_requirements))))
+            bytestr(cache), b' '.join(sorted(wanted_requirements))))
 
         missing = wanted_requirements - our_requirements
 
         # Allow requirements mismatch for uid/gid if and only if caches
         # are untrusted. This allows cache behavior on Try to be
         # reasonable. Otherwise, random tasks could "poison" cache
         # usability by introducing uid/gid mismatches. For untrusted
         # environments like Try, this is a perfectly reasonable thing to
         # allow.
         if missing and untrusted_caches and running_as_root and \
-                all(s.startswith(('uid=', 'gid=')) for s in missing):
+                all(s.startswith((b'uid=', b'gid=')) for s in missing):
             print_line(b'cache',
                        b'cache %s uid/gid mismatch; this is acceptable '
                        b'because caches for this task are untrusted; '
                        b'changing ownership to facilitate cache use\n' %
-                       cache)
+                       bytestr(cache))
             chown_recursive(cache, user.pw_name, group.gr_name, user.pw_uid,
                             group.gr_gid)
 
             # And write out the updated reality.
             with open(requires_path, 'wb') as fh:
                 fh.write(b'\n'.join(sorted(our_requirements)))
 
             write_audit_entry(audit_path,
-                              'chown; requirements: %s' %
-                              ', '.join(sorted(our_requirements)))
+                              b'chown; requirements: %s' %
+                              b', '.join(sorted(our_requirements)))
 
         elif missing:
             print('error: requirements for populated cache %s differ from '
                   'this task' % cache)
             print('cache requirements: %s' % ' '.join(sorted(
-                wanted_requirements)))
+                map(sysstr, wanted_requirements))))
             print('our requirements:   %s' % ' '.join(sorted(
-                our_requirements)))
-            if any(s.startswith(('uid=', 'gid=')) for s in missing):
+                map(sysstr, our_requirements))))
+            if any(s.startswith((b'uid=', b'gid=')) for s in missing):
                 print(CACHE_UID_GID_MISMATCH)
 
             write_audit_entry(audit_path,
-                              'requirements mismatch; wanted: %s' %
-                              ', '.join(sorted(our_requirements)))
+                              b'requirements mismatch; wanted: %s' %
+                              b', '.join(sorted(our_requirements)))
 
             print('')
             print('audit log:')
-            with open(audit_path, 'rb') as fh:
+            with open(audit_path, 'r') as fh:
                 print(fh.read())
 
             return True
         else:
-            write_audit_entry(audit_path, 'used')
+            write_audit_entry(audit_path, b'used')
 
         # We don't need to adjust permissions here because the cache is
         # associated with a uid/gid and the first task should have set
         # a proper owner/group.
 
         return
 
     # The cache has content and no requirements file. This shouldn't
     # happen because run-task should be the first thing that touches a
     # cache.
     print('error: cache %s is not empty and is missing a '
           '.cacherequires file; the cache names for this task are '
           'likely mis-configured or TASKCLUSTER_CACHES is not set '
           'properly' % cache)
 
-    write_audit_entry(audit_path, 'missing .cacherequires')
+    write_audit_entry(audit_path, b'missing .cacherequires')
     return True
 
 
 def configure_volume_posix(volume, user, group, running_as_root):
     # The only time we should see files in the volume is if the Docker
     # image build put files there.
     #
     # For the sake of simplicity, our policy is that volumes should be
@@ -380,110 +382,112 @@ def configure_volume_posix(volume, user,
               ' '.join(sorted(volume_files)))
         sys.exit(1)
 
     # The volume is almost certainly owned by root:root. Chown it so it
     # is writable.
 
     if running_as_root:
         print_line(b'volume', b'changing ownership of volume %s '
-                              b'to %d:%d\n' % (volume, user.pw_uid,
+                              b'to %d:%d\n' % (bytestr(volume),
+                                               user.pw_uid,
                                                group.gr_gid))
         set_dir_permissions(volume, user.pw_uid, group.gr_gid)
 
 
 def vcs_checkout(source_repo, dest, store_path,
                  base_repo=None, revision=None, branch=None,
                  fetch_hgfingerprint=False, sparse_profile=None):
     # Specify method to checkout a revision. This defaults to revisions as
     # SHA-1 strings, but also supports symbolic revisions like `tip` via the
     # branch flag.
     if revision:
-        revision_flag = b'--revision'
+        revision_flag = '--revision'
         revision_value = revision
     elif branch:
-        revision_flag = b'--branch'
+        revision_flag = '--branch'
         revision_value = branch
     else:
         print('revision is not specified for checkout')
         sys.exit(1)
 
     if IS_POSIX:
-        hg_bin = b'hg'
+        hg_bin = 'hg'
     elif IS_WINDOWS:
         # This is where OCC installs it in the AMIs.
-        hg_bin = br'C:\Program Files\Mercurial\hg.exe'
+        hg_bin = r'C:\Program Files\Mercurial\hg.exe'
         if not os.path.exists(hg_bin):
             print('could not find Mercurial executable: %s' % hg_bin)
             sys.exit(1)
 
     args = [
         hg_bin,
-        b'robustcheckout',
-        b'--sharebase', store_path,
-        b'--purge',
+        'robustcheckout',
+        '--sharebase', store_path,
+        '--purge',
     ]
 
     # Obtain certificate fingerprints.  Without this, the checkout will use the fingerprint
     # on the system, which is managed some other way (such as puppet)
     if fetch_hgfingerprint:
         try:
-            print_line(b'vcs', 'fetching hg.mozilla.org fingerprint from %s\n' %
-                       FINGERPRINT_URL)
+            print_line(b'vcs', b'fetching hg.mozilla.org fingerprint from %s\n' %
+                       bytestr(FINGERPRINT_URL))
             res = urlopen(FINGERPRINT_URL, timeout=10)
             secret = res.read()
             try:
                 secret = json.loads(secret, encoding='utf-8')
             except ValueError:
-                print_line(b'vcs', 'invalid JSON in hg fingerprint secret')
+                print_line(b'vcs', b'invalid JSON in hg fingerprint secret')
                 sys.exit(1)
         except (URLError, socket.timeout):
-            print_line(b'vcs', 'Unable to retrieve current hg.mozilla.org fingerprint'
-                               'using the secret service, using fallback instead.')
+            print_line(b'vcs', b'Unable to retrieve current hg.mozilla.org fingerprint'
+                               b'using the secret service, using fallback instead.')
             # XXX This fingerprint will not be accurate if running on an old
             #     revision after the server fingerprint has changed.
             secret = {'secret': FALLBACK_FINGERPRINT}
 
-        hgmo_fingerprint = secret['secret']['fingerprints'].encode('ascii')
+        hgmo_fingerprint = secret['secret']['fingerprints']
         args.extend([
-            b'--config', b'hostsecurity.hg.mozilla.org:fingerprints=%s' % hgmo_fingerprint,
+            '--config', 'hostsecurity.hg.mozilla.org:fingerprints=%s' % hgmo_fingerprint,
         ])
 
     if base_repo:
-        args.extend([b'--upstream', base_repo])
+        args.extend(['--upstream', base_repo])
     if sparse_profile:
-        args.extend([b'--sparseprofile', sparse_profile])
+        args.extend(['--sparseprofile', sparse_profile])
 
     args.extend([
         revision_flag, revision_value,
         source_repo, dest,
     ])
 
     res = run_and_prefix_output(b'vcs', args,
                                 extra_env={b'PYTHONUNBUFFERED': b'1'})
     if res:
         sys.exit(res)
 
     # Update the current revision hash and ensure that it is well formed.
     revision = subprocess.check_output(
-        [hg_bin, b'log',
-         b'--rev', b'.',
-         b'--template', b'{node}'],
-        cwd=dest)
+        [hg_bin, 'log',
+         '--rev', '.',
+         '--template', '{node}'],
+        cwd=dest,
+        # Triggers text mode on Python 3.
+        universal_newlines=True)
 
     assert re.match('^[a-f0-9]{40}$', revision)
 
-    repo_name = source_repo.split('/')[-1]
-    print_line(b'vcs', b"TinderboxPrint:<a href={source_repo}/rev/{revision} "
-                       b"title='Built from {repo_name} revision {revision}'>"
-                       b"{revision}</a>\n".format(
-                           revision=revision,
-                           source_repo=source_repo,
-                           repo_name=repo_name,
-                       ))
+    msg = ("TinderboxPrint:<a href={source_repo}/rev/{revision} "
+           "title='Built from {repo_name} revision {revision}'>"
+           "{revision}</a>\n".format(revision=revision,
+                                     source_repo=source_repo,
+                                     repo_name=source_repo.split('/')[-1]))
+
+    print_line(b'vcs', msg.encode('utf-8'))
 
     return revision
 
 
 def main(args):
     print_line(b'setup', b'run-task started\n')
     running_as_root = IS_POSIX and os.getuid() == 0
 
@@ -577,17 +581,17 @@ def main(args):
     if volumes and not IS_POSIX:
         print('assertion failed: volumes not expected on Windows')
         return 1
 
     # Sanitize volumes.
     for volume in volumes:
         # If a volume is a cache, it was dealt with above.
         if volume in caches:
-            print_line(b'volume', b'volume %s is a cache\n' % volume)
+            print_line(b'volume', b'volume %s is a cache\n' % bytestr(volume))
             continue
 
         configure_volume_posix(volume, user, group, running_as_root)
 
     all_caches_and_volumes = set(map(os.path.normpath, caches))
     all_caches_and_volumes |= set(map(os.path.normpath, volumes))
 
     def path_in_cache_or_volume(path):
@@ -615,17 +619,17 @@ def main(args):
                   checkout)
             sys.exit(1)
 
         # TODO given the performance implications, consider making this a fatal
         # error.
         if not path_in_cache_or_volume(checkout):
             print_line(b'vcs', b'WARNING: vcs checkout path (%s) not in cache '
                                b'or volume; performance will likely suffer\n' %
-                               checkout)
+                               bytestr(checkout))
 
         # Ensure the directory for the source checkout exists.
         try:
             os.makedirs(os.path.dirname(checkout))
         except OSError as e:
             if e.errno != errno.EEXIST:
                 raise
 
@@ -639,17 +643,17 @@ def main(args):
             print('error: HG_STORE_PATH environment variable not set')
             sys.exit(1)
 
         store_path = os.environ['HG_STORE_PATH']
 
         if not path_in_cache_or_volume(store_path):
             print_line(b'vcs', b'WARNING: HG_STORE_PATH (%s) not in cache or '
                                b'volume; performance will likely suffer\n' %
-                               store_path)
+                               bytestr(store_path))
 
         try:
             os.makedirs(store_path)
         except OSError as e:
             if e.errno != errno.EEXIST:
                 raise
 
         if running_as_root:
@@ -677,39 +681,39 @@ def main(args):
     # disallow running tasks with symbolic revisions unless they have been
     # resolved by a checkout.
     if args.vcs_checkout:
         base_repo = os.environ.get('GECKO_BASE_REPOSITORY')
         # Some callers set the base repository to mozilla-central for historical
         # reasons. Switch to mozilla-unified because robustcheckout works best
         # with it.
         if base_repo == 'https://hg.mozilla.org/mozilla-central':
-            base_repo = b'https://hg.mozilla.org/mozilla-unified'
+            base_repo = 'https://hg.mozilla.org/mozilla-unified'
 
         os.environ['GECKO_HEAD_REV'] = vcs_checkout(
             os.environ['GECKO_HEAD_REPOSITORY'],
             args.vcs_checkout,
             os.environ['HG_STORE_PATH'],
             base_repo=base_repo,
             revision=os.environ.get('GECKO_HEAD_REV'),
             branch=os.environ.get('GECKO_HEAD_REF'),
             sparse_profile=args.sparse_profile)
 
     elif not os.environ.get('GECKO_HEAD_REV') and \
             os.environ.get('GECKO_HEAD_REF'):
         print('task should be defined in terms of non-symbolic revision')
         return 1
 
     if args.tools_checkout:
-        vcs_checkout(b'https://hg.mozilla.org/build/tools',
+        vcs_checkout('https://hg.mozilla.org/build/tools',
                      args.tools_checkout,
                      os.environ['HG_STORE_PATH'],
                      # Always check out the latest commit on default branch.
                      # This is non-deterministic!
-                     branch=b'default')
+                     branch='default')
 
     # Checkout the repository, setting the COMM_HEAD_REV to the current
     # revision hash. Revision hashes have priority over symbolic revisions. We
     # disallow running tasks with symbolic revisions unless they have been
     # resolved by a checkout.
     if args.comm_checkout:
         base_repo = os.environ.get('COMM_BASE_REPOSITORY')