Bug 1460470 - Make run-task somewhat usable on Python 3; r=mshal
☠☠ backed out by 278ac3ea0ce5 ☠ ☠
authorGregory Szorc <gps@mozilla.com>
Wed, 16 May 2018 11:06:36 -0700
changeset 796060 19fe5702cf6d018b743108b35e86d1750f205a76
parent 796059 ef477a048b575958be74287a2273830813b385f1
child 796061 4902cab3ce5dab2d1756cf0cd5c95f40603c0a0e
push id110148
push userbmo:gps@mozilla.com
push dateWed, 16 May 2018 22:07:12 +0000
reviewersmshal
bugs1460470
milestone62.0a1
Bug 1460470 - Make run-task somewhat usable on Python 3; r=mshal This required a lot of attention to bytes versus strings. The hacks around handling process output are somewhat gross. Apparently readline() doesn't work on bytes streams in Python 3?! So we install a custom stream decoder so we can have nice things. There are still some failures in run-task on Python 3. But we're a big step closer. MozReview-Commit-ID: 4FJlTn3q9Ai
taskcluster/scripts/run-task
--- a/taskcluster/scripts/run-task
+++ b/taskcluster/scripts/run-task
@@ -13,16 +13,17 @@ the requested process and prints its out
 current time to improve log usefulness.
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import argparse
 import datetime
 import errno
+import io
 import json
 import os
 import re
 import socket
 import stat
 import subprocess
 import sys
 
@@ -34,16 +35,18 @@ try:
     URLError = urllib.error.URLError
 except ImportError:
     import urllib2
 
     urlopen = urllib2.urlopen
     URLError = urllib2.URLError
 
 
+PY3 = sys.version_info.major == 3
+
 FINGERPRINT_URL = 'http://taskcluster/secrets/v1/secret/project/taskcluster/gecko/hgfingerprint'
 FALLBACK_FINGERPRINT = {
     'fingerprints':
         "sha256:8e:ad:f7:6a:eb:44:06:15:ed:f3:e4:69:a6:64:60:37:2d:ff:98:88:37"
         ":bf:d7:b8:40:84:01:48:9c:26:ce:d9"}
 
 
 CACHE_UID_GID_MISMATCH = '''
@@ -81,52 +84,74 @@ where it fails to use new volumes for ta
 #     sort of error (e.g., syntax error).
 EXIT_PURGE_CACHE = 72
 
 
 IS_POSIX = os.name == 'posix'
 IS_WINDOWS = os.name == 'nt'
 
 
+if PY3:
+    bytestr = lambda x: x.encode('utf-8', 'strict')
+else:
+    bytestr = bytes
+
+
 def print_line(prefix, m):
-    now = datetime.datetime.utcnow().isoformat()
-    now = now[:-3] if now[-7] == '.' else now  # slice microseconds to 3 decimals
-    print(b'[%s %sZ] %s' % (prefix, now, m), end=b'')
+    now = bytestr(datetime.datetime.utcnow().isoformat())
+    # slice microseconds to 3 decimals.
+    now = now[:-3] if now[-7:-6] == b'.' else now
+    sys.stdout.buffer.write(b'[%s %sZ] %s' % (prefix, now, m))
+    sys.stdout.buffer.flush()
 
 
 def run_and_prefix_output(prefix, args, extra_env=None):
     """Runs a process and prefixes its output with the time.
 
     Returns the process exit code.
     """
-    print_line(prefix, b'executing %s\n' % args)
+    print_line(prefix, b'executing %r\n' % args)
 
     env = dict(os.environ)
     env.update(extra_env or {})
 
     # Note: TaskCluster's stdin is a TTY. This attribute is lost
     # when we pass sys.stdin to the invoked process. If we cared
     # to preserve stdin as a TTY, we could make this work. But until
     # someone needs it, don't bother.
+
+    # We want stdout to be bytes on Python 3. That means we can't use
+    # universal_newlines=True (because it implies text mode). But
+    # p.stdout.readline() won't work for bytes text streams. So, on Python 3,
+    # we manually install a latin1 stream wrapper. This allows us to readline()
+    # and preserves bytes, without losing any data.
+
     p = subprocess.Popen(args,
                          # Disable buffering because we want to receive output
                          # as it is generated so timestamps in logs are
                          # accurate.
                          bufsize=0,
                          stdout=subprocess.PIPE,
                          stderr=subprocess.STDOUT,
                          stdin=sys.stdin.fileno(),
                          cwd='/',
                          env=env,
-                         # So \r in progress bars are rendered as multiple
-                         # lines, preserving progress indicators.
-                         universal_newlines=True)
+                         universal_newlines=not PY3)
+
+    if PY3:
+        stdout = io.TextIOWrapper(p.stdout, encoding='latin1')
+    else:
+        stdout = p.stdout
 
     while True:
-        data = p.stdout.readline()
+        data = stdout.readline()
+
+        if PY3:
+            data = data.encode('latin1')
+
         if data == b'':
             break
 
         print_line(prefix, data)
 
     return p.wait()
 
 
@@ -635,17 +660,18 @@ def main(args):
     if args.vcs_checkout or args.tools_checkout or args.comm_checkout:
         prepare_hg_store_path()
 
     if IS_POSIX and running_as_root:
         # Drop permissions to requested user.
         # This code is modeled after what `sudo` was observed to do in a Docker
         # container. We do not bother calling setrlimit() because containers have
         # their own limits.
-        print_line(b'setup', b'running as %s:%s\n' % (args.user, args.group))
+        print_line(b'setup', b'running as %s:%s\n' % (bytestr(args.user),
+                                                      bytestr(args.group)))
         os.setgroups(gids)
         os.umask(0o22)
         os.setresgid(gid, gid, gid)
         os.setresuid(uid, uid, uid)
 
     # Checkout the repository, setting the GECKO_HEAD_REV to the current
     # revision hash. Revision hashes have priority over symbolic revisions. We
     # disallow running tasks with symbolic revisions unless they have been
@@ -700,12 +726,13 @@ def main(args):
         print('task should be defined in terms of non-symbolic revision')
         return 1
 
     return run_and_prefix_output(b'task', task_args)
 
 
 if __name__ == '__main__':
     # Unbuffer stdio.
-    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
-    sys.stderr = os.fdopen(sys.stderr.fileno(), 'w', 0)
+    if not PY3:
+        sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
+        sys.stderr = os.fdopen(sys.stderr.fileno(), 'w', 0)
 
     sys.exit(main(sys.argv[1:]))