Bug 1460470 - Make run-task Python 3.5+ only; r=mshal
☠☠ backed out by 278ac3ea0ce5 ☠ ☠
authorGregory Szorc <gps@mozilla.com>
Fri, 11 May 2018 10:19:53 -0700
changeset 796063 94a9641c5a018cfe729ebe748e75a7c4373e4322
parent 796062 33fe8423f88cb8158559bc736d89b4849b57d315
child 796247 7a2224a146ec8b9f6a6b982be3b241e4b127bf5d
push id110148
push userbmo:gps@mozilla.com
push dateWed, 16 May 2018 22:07:12 +0000
reviewersmshal
bugs1460470
milestone62.0a1
Bug 1460470 - Make run-task Python 3.5+ only; r=mshal A try push converting run-task to Python 3 seemed to complete without error. Since it is annoying writing code that needs to work on both Python 2 and 3, let's require Python 3 and remove code for supporting Python 2. We implement a version check enforcing Python 3.5+. This is because we're supposed to be standardizing on 3.5+ everywhere. I want to prevent accidental usage of older Python 3 versions. MozReview-Commit-ID: 4vATLZ6Si2e
taskcluster/scripts/run-task
--- a/taskcluster/scripts/run-task
+++ b/taskcluster/scripts/run-task
@@ -8,44 +8,38 @@
 This script is meant to be the "driver" for TaskCluster based tasks.
 It receives some common arguments to control the run-time environment.
 
 It performs actions as requested from the arguments. Then it executes
 the requested process and prints its output, prefixing it with the
 current time to improve log usefulness.
 """
 
-from __future__ import absolute_import, print_function, unicode_literals
+import sys
+
+
+if sys.version_info[0:2] < (3, 5):
+    print('run-task requires Python 3.5+')
+    sys.exit(1)
+
 
 import argparse
 import datetime
 import errno
 import io
 import json
 import os
 import re
 import socket
 import stat
 import subprocess
-import sys
-
-try:
-    import urllib.error
-    import urllib.request
 
-    urlopen = urllib.request.urlopen
-    URLError = urllib.error.URLError
-except ImportError:
-    import urllib2
+import urllib.error
+import urllib.request
 
-    urlopen = urllib2.urlopen
-    URLError = urllib2.URLError
-
-
-PY3 = sys.version_info.major == 3
 
 FINGERPRINT_URL = 'http://taskcluster/secrets/v1/secret/project/taskcluster/gecko/hgfingerprint'
 FALLBACK_FINGERPRINT = {
     'fingerprints':
         "sha256:8e:ad:f7:6a:eb:44:06:15:ed:f3:e4:69:a6:64:60:37:2d:ff:98:88:37"
         ":bf:d7:b8:40:84:01:48:9c:26:ce:d9"}
 
 
@@ -84,26 +78,18 @@ where it fails to use new volumes for ta
 #     sort of error (e.g., syntax error).
 EXIT_PURGE_CACHE = 72
 
 
 IS_POSIX = os.name == 'posix'
 IS_WINDOWS = os.name == 'nt'
 
 
-if PY3:
-    bytestr = lambda x: x.encode('utf-8', 'strict')
-    sysstr = lambda x: x.decode('utf-8', 'strict')
-else:
-    bytestr = bytes
-    sysstr = str
-
-
 def print_line(prefix, m):
-    now = bytestr(datetime.datetime.utcnow().isoformat())
+    now = datetime.datetime.utcnow().isoformat().encode('utf-8')
     # slice microseconds to 3 decimals.
     now = now[:-3] if now[-7:-6] == b'.' else now
     sys.stdout.buffer.write(b'[%s %sZ] %s' % (prefix, now, m))
     sys.stdout.buffer.flush()
 
 
 def run_and_prefix_output(prefix, args, extra_env=None):
     """Runs a process and prefixes its output with the time.
@@ -130,29 +116,22 @@ def run_and_prefix_output(prefix, args, 
                          # Disable buffering because we want to receive output
                          # as it is generated so timestamps in logs are
                          # accurate.
                          bufsize=0,
                          stdout=subprocess.PIPE,
                          stderr=subprocess.STDOUT,
                          stdin=sys.stdin.fileno(),
                          cwd='/',
-                         env=env,
-                         universal_newlines=not PY3)
+                         env=env)
 
-    if PY3:
-        stdout = io.TextIOWrapper(p.stdout, encoding='latin1')
-    else:
-        stdout = p.stdout
+    stdout = io.TextIOWrapper(p.stdout, encoding='latin1')
 
     while True:
-        data = stdout.readline()
-
-        if PY3:
-            data = data.encode('latin1')
+        data = stdout.readline().encode('latin1')
 
         if data == b'':
             break
 
         print_line(prefix, data)
 
     return p.wait()
 
@@ -187,20 +166,20 @@ def get_posix_user_group(user, group):
 
     # Find all groups to which this user is a member.
     gids = [g.gr_gid for g in grp.getgrall() if group in g.gr_mem]
 
     return user_record, group_record, gids
 
 
 def write_audit_entry(path, msg):
-    now = bytestr(datetime.datetime.utcnow().isoformat())
+    now = datetime.datetime.utcnow().isoformat().encode('utf-8')
     with open(path, 'ab') as fh:
         fh.write(b'[%sZ %s] %s\n' % (
-                 now, bytestr(os.environ.get('TASK_ID', 'UNKNOWN')), msg))
+                 now, os.environb.get(b'TASK_ID', b'UNKNOWN'), msg))
 
 
 WANTED_DIR_MODE = stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR
 
 
 def set_dir_permissions(path, uid, gid):
     st = os.lstat(path)
 
@@ -211,17 +190,18 @@ def set_dir_permissions(path, uid, gid):
     # them.
     if st.st_mode & WANTED_DIR_MODE != WANTED_DIR_MODE:
         os.chmod(path, st.st_mode | WANTED_DIR_MODE)
 
 
 def chown_recursive(path, user, group, uid, gid):
     print_line(b'chown',
                b'recursively changing ownership of %s to %s:%s\n' %
-               (bytestr(path), bytestr(user), bytestr(group)))
+               (path.encode('utf-8'), user.encode('utf-8'), group.encode(
+                   'utf-8')))
 
     set_dir_permissions(path, uid, gid)
 
     for root, dirs, files in os.walk(path):
         for d in dirs:
             set_dir_permissions(os.path.join(root, d), uid, gid)
 
         for f in files:
@@ -268,17 +248,17 @@ def configure_cache_posix(cache, user, g
 
     requires_path = os.path.join(cache, '.cacherequires')
     audit_path = os.path.join(cache, '.cachelog')
 
     # The cache is empty. Configure it.
     if not os.listdir(cache):
         print_line(b'cache', b'cache %s is empty; writing requirements: '
                              b'%s\n' % (
-                   bytestr(cache), b' '.join(sorted(our_requirements))))
+                    cache.encode('utf-8'), b' '.join(sorted(our_requirements))))
 
         # We write a requirements file so future invocations know what the
         # requirements are.
         with open(requires_path, 'wb') as fh:
             fh.write(b'\n'.join(sorted(our_requirements)))
 
         # And make it read-only as a precaution against deletion.
         os.chmod(requires_path, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
@@ -292,51 +272,51 @@ def configure_cache_posix(cache, user, g
 
     # The cache has content and we have a requirements file. Validate
     # requirements alignment.
     if os.path.exists(requires_path):
         with open(requires_path, 'rb') as fh:
             wanted_requirements = set(fh.read().splitlines())
 
         print_line(b'cache', b'cache %s exists; requirements: %s\n' % (
-            bytestr(cache), b' '.join(sorted(wanted_requirements))))
+            cache.encode('utf-8'), b' '.join(sorted(wanted_requirements))))
 
         missing = wanted_requirements - our_requirements
 
         # Allow requirements mismatch for uid/gid if and only if caches
         # are untrusted. This allows cache behavior on Try to be
         # reasonable. Otherwise, random tasks could "poison" cache
         # usability by introducing uid/gid mismatches. For untrusted
         # environments like Try, this is a perfectly reasonable thing to
         # allow.
         if missing and untrusted_caches and running_as_root and \
                 all(s.startswith((b'uid=', b'gid=')) for s in missing):
             print_line(b'cache',
                        b'cache %s uid/gid mismatch; this is acceptable '
                        b'because caches for this task are untrusted; '
                        b'changing ownership to facilitate cache use\n' %
-                       bytestr(cache))
+                       cache.encode('utf-8'))
             chown_recursive(cache, user.pw_name, group.gr_name, user.pw_uid,
                             group.gr_gid)
 
             # And write out the updated reality.
             with open(requires_path, 'wb') as fh:
                 fh.write(b'\n'.join(sorted(our_requirements)))
 
             write_audit_entry(audit_path,
                               b'chown; requirements: %s' %
                               b', '.join(sorted(our_requirements)))
 
         elif missing:
             print('error: requirements for populated cache %s differ from '
                   'this task' % cache)
             print('cache requirements: %s' % ' '.join(sorted(
-                map(sysstr, wanted_requirements))))
+                s.decode('utf-8') for s in wanted_requirements)))
             print('our requirements:   %s' % ' '.join(sorted(
-                map(sysstr, our_requirements))))
+                s.decode('utf-8') for s in our_requirements)))
             if any(s.startswith((b'uid=', b'gid=')) for s in missing):
                 print(CACHE_UID_GID_MISMATCH)
 
             write_audit_entry(audit_path,
                               b'requirements mismatch; wanted: %s' %
                               b', '.join(sorted(our_requirements)))
 
             print('')
@@ -382,17 +362,17 @@ def configure_volume_posix(volume, user,
               ' '.join(sorted(volume_files)))
         sys.exit(1)
 
     # The volume is almost certainly owned by root:root. Chown it so it
     # is writable.
 
     if running_as_root:
         print_line(b'volume', b'changing ownership of volume %s '
-                              b'to %d:%d\n' % (bytestr(volume),
+                              b'to %d:%d\n' % (volume.encode('utf-8'),
                                                user.pw_uid,
                                                group.gr_gid))
         set_dir_permissions(volume, user.pw_uid, group.gr_gid)
 
 
 def vcs_checkout(source_repo, dest, store_path,
                  base_repo=None, revision=None, branch=None,
                  fetch_hgfingerprint=False, sparse_profile=None):
@@ -425,25 +405,25 @@ def vcs_checkout(source_repo, dest, stor
         '--purge',
     ]
 
     # Obtain certificate fingerprints.  Without this, the checkout will use the fingerprint
     # on the system, which is managed some other way (such as puppet)
     if fetch_hgfingerprint:
         try:
             print_line(b'vcs', b'fetching hg.mozilla.org fingerprint from %s\n' %
-                       bytestr(FINGERPRINT_URL))
-            res = urlopen(FINGERPRINT_URL, timeout=10)
+                       FINGERPRINT_URL.encode('utf-8'))
+            res = urllib.request.urlopen(FINGERPRINT_URL, timeout=10)
             secret = res.read()
             try:
                 secret = json.loads(secret, encoding='utf-8')
             except ValueError:
                 print_line(b'vcs', b'invalid JSON in hg fingerprint secret')
                 sys.exit(1)
-        except (URLError, socket.timeout):
+        except (urllib.error.URLError, socket.timeout):
             print_line(b'vcs', b'Unable to retrieve current hg.mozilla.org fingerprint'
                                b'using the secret service, using fallback instead.')
             # XXX This fingerprint will not be accurate if running on an old
             #     revision after the server fingerprint has changed.
             secret = {'secret': FALLBACK_FINGERPRINT}
 
         hgmo_fingerprint = secret['secret']['fingerprints']
         args.extend([
@@ -581,17 +561,18 @@ def main(args):
     if volumes and not IS_POSIX:
         print('assertion failed: volumes not expected on Windows')
         return 1
 
     # Sanitize volumes.
     for volume in volumes:
         # If a volume is a cache, it was dealt with above.
         if volume in caches:
-            print_line(b'volume', b'volume %s is a cache\n' % bytestr(volume))
+            print_line(b'volume', b'volume %s is a cache\n' %
+                       volume.encode('utf-8'))
             continue
 
         configure_volume_posix(volume, user, group, running_as_root)
 
     all_caches_and_volumes = set(map(os.path.normpath, caches))
     all_caches_and_volumes |= set(map(os.path.normpath, volumes))
 
     def path_in_cache_or_volume(path):
@@ -619,17 +600,17 @@ def main(args):
                   checkout)
             sys.exit(1)
 
         # TODO given the performance implications, consider making this a fatal
         # error.
         if not path_in_cache_or_volume(checkout):
             print_line(b'vcs', b'WARNING: vcs checkout path (%s) not in cache '
                                b'or volume; performance will likely suffer\n' %
-                               bytestr(checkout))
+                               checkout.encode('utf-8'))
 
         # Ensure the directory for the source checkout exists.
         try:
             os.makedirs(os.path.dirname(checkout))
         except OSError as e:
             if e.errno != errno.EEXIST:
                 raise
 
@@ -643,17 +624,17 @@ def main(args):
             print('error: HG_STORE_PATH environment variable not set')
             sys.exit(1)
 
         store_path = os.environ['HG_STORE_PATH']
 
         if not path_in_cache_or_volume(store_path):
             print_line(b'vcs', b'WARNING: HG_STORE_PATH (%s) not in cache or '
                                b'volume; performance will likely suffer\n' %
-                               bytestr(store_path))
+                               store_path.encode('utf-8'))
 
         try:
             os.makedirs(store_path)
         except OSError as e:
             if e.errno != errno.EEXIST:
                 raise
 
         if running_as_root:
@@ -664,18 +645,19 @@ def main(args):
     if args.vcs_checkout or args.tools_checkout or args.comm_checkout:
         prepare_hg_store_path()
 
     if IS_POSIX and running_as_root:
         # Drop permissions to requested user.
         # This code is modeled after what `sudo` was observed to do in a Docker
         # container. We do not bother calling setrlimit() because containers have
         # their own limits.
-        print_line(b'setup', b'running as %s:%s\n' % (bytestr(args.user),
-                                                      bytestr(args.group)))
+        print_line(b'setup', b'running as %s:%s\n' % (
+            args.user.encode('utf-8'), args.group.encode('utf-8')))
+
         os.setgroups(gids)
         os.umask(0o22)
         os.setresgid(gid, gid, gid)
         os.setresuid(uid, uid, uid)
 
     # Checkout the repository, setting the GECKO_HEAD_REV to the current
     # revision hash. Revision hashes have priority over symbolic revisions. We
     # disallow running tasks with symbolic revisions unless they have been
@@ -729,14 +711,9 @@ def main(args):
             os.environ.get('COMM_HEAD_REF'):
         print('task should be defined in terms of non-symbolic revision')
         return 1
 
     return run_and_prefix_output(b'task', task_args)
 
 
 if __name__ == '__main__':
-    # Unbuffer stdio.
-    if not PY3:
-        sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
-        sys.stderr = os.fdopen(sys.stderr.fileno(), 'w', 0)
-
     sys.exit(main(sys.argv[1:]))