Bug 1459737 - Make run-task more runnable on non-POSIX platforms; r?dustin draft
authorGregory Szorc <gps@mozilla.com>
Fri, 04 May 2018 17:11:53 -0700
changeset 792199 2ba50a1980ffea144e48ce9ff667c126dcb23759
parent 792198 57d70ebb48138f303c0ca48481334d756f7db20e
child 792200 64d58a56a40db839e367744101aca884341ca0a2
push id109042
push userbmo:gps@mozilla.com
push dateMon, 07 May 2018 21:25:20 +0000
reviewersdustin
bugs1459737
milestone62.0a1
Bug 1459737 - Make run-task more runnable on non-POSIX platforms; r?dustin I want to make run-task work on Windows. The script is currently very POSIX oriented, as it assumes the existence of the grp and pwd modules, that user IDs are numeric, and that system calls like setresgid() and setresuid() are available, etc. This commit starts to make some of the POSIX-centric code conditional on running on POSIX. Code for uid/gid extraction has been moved to its own function. Some error messages were tweaked slightly as part of the move. Otherwise, the changes should be pretty straightforward. There are still other parts of this file that won't work on e.g. Windows. But this gets us a big step closer. MozReview-Commit-ID: HNyytKcBbBo
taskcluster/scripts/run-task
--- a/taskcluster/scripts/run-task
+++ b/taskcluster/scripts/run-task
@@ -13,20 +13,18 @@ the requested process and prints its out
 current time to improve log usefulness.
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import argparse
 import datetime
 import errno
-import grp
 import json
 import os
-import pwd
 import re
 import socket
 import stat
 import subprocess
 import sys
 import urllib2
 
 
@@ -68,16 +66,19 @@ where it fails to use new volumes for ta
 
 # The exit code to use when caches should be purged and the task retried.
 # This is EX_OSFILE (from sysexits.h):
 #     Some system file  does not exist, cannot be opened, or has some
 #     sort of error (e.g., syntax error).
 EXIT_PURGE_CACHE = 72
 
 
+IS_POSIX = os.name == 'posix'
+
+
 def print_line(prefix, m):
     now = datetime.datetime.utcnow().isoformat()
     now = now[:-3] if now[-7] == '.' else now  # slice microseconds to 3 decimals
     print(b'[%s %sZ] %s' % (prefix, now, m), end=b'')
 
 
 def run_and_prefix_output(prefix, args, extra_env=None):
     """Runs a process and prefixes its output with the time.
@@ -112,16 +113,50 @@ def run_and_prefix_output(prefix, args, 
         if data == b'':
             break
 
         print_line(prefix, data)
 
     return p.wait()
 
 
+def get_posix_user_group(user, group):
+    import grp
+    import pwd
+
+    try:
+        user_record = pwd.getpwnam(user)
+    except KeyError:
+        print('could not find user %s; specify a valid user with --user' % user)
+        sys.exit(1)
+
+    try:
+        group_record = grp.getgrnam(group)
+    except KeyError:
+        print('could not find group %s; specify a valid group with --group' %
+              group)
+        sys.exit(1)
+
+    # Most tasks use worker:worker. We require they have a specific numeric ID
+    # because otherwise it is too easy for files written to caches to have
+    # mismatched numeric IDs, which results in permissions errors.
+    if user_record.pw_name == 'worker' and user_record.pw_uid != 1000:
+        print('user `worker` must have uid=1000; got %d' % user_record.pw_uid)
+        sys.exit(1)
+
+    if group_record.gr_name == 'worker' and group_record.gr_gid != 1000:
+        print('group `worker` must have gid=1000; got %d' % group_record.gr_gid)
+        sys.exit(1)
+
+    # Find all groups to which this user is a member.
+    gids = [g.gr_gid for g in grp.getgrall() if group in g.gr_mem]
+
+    return user_record, group_record, gids
+
+
 WANTED_DIR_MODE = stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR
 
 
 def set_dir_permissions(path, uid, gid):
     st = os.lstat(path)
 
     if st.st_uid != uid or st.st_gid != gid:
         os.chown(path, uid, gid)
@@ -233,17 +268,17 @@ def vcs_checkout(source_repo, dest, stor
                            repo_name=repo_name,
                        ))
 
     return revision
 
 
 def main(args):
     print_line(b'setup', b'run-task started\n')
-    running_as_root = os.getuid() == 0
+    running_as_root = IS_POSIX and os.getuid() == 0
 
     # Arguments up to '--' are ours. After are for the main task
     # to be executed.
     try:
         i = args.index('--')
         our_args = args[0:i]
         task_args = args[i + 1:]
     except ValueError:
@@ -270,40 +305,18 @@ def main(args):
     # expand ~ in some paths
     if args.vcs_checkout:
         args.vcs_checkout = os.path.expanduser(args.vcs_checkout)
     if args.tools_checkout:
         args.tools_checkout = os.path.expanduser(args.tools_checkout)
     if 'HG_STORE_PATH' in os.environ:
         os.environ['HG_STORE_PATH'] = os.path.expanduser(os.environ['HG_STORE_PATH'])
 
-    if running_as_root:
-        try:
-            user = pwd.getpwnam(args.user)
-        except KeyError:
-            print('could not find user %s; specify --user to a known user' %
-                  args.user)
-            return 1
-        try:
-            group = grp.getgrnam(args.group)
-        except KeyError:
-            print('could not find group %s; specify --group to a known group' %
-                  args.group)
-            return 1
-
-        if user.pw_name == 'worker' and user.pw_uid != 1000:
-            print('user `worker` must have uid=1000.')
-            return 1
-        if group.gr_name == 'worker' and group.gr_gid != 1000:
-            print('group `worker` must have gid=1000.')
-            return 1
-
-        # Find all groups to which this user is a member.
-        gids = [g.gr_gid for g in grp.getgrall() if args.group in g.gr_mem]
-
+    if IS_POSIX and running_as_root:
+        user, group, gids = get_posix_user_group(args.user, args.group)
         uid = user.pw_uid
         gid = group.gr_gid
     else:
         uid = gid = gids = None
 
     # Validate caches.
     #
     # Taskgraph should pass in a list of paths that are caches via an
@@ -337,22 +350,25 @@ def main(args):
 
     our_requirements = {
         # Include a version string that we can bump whenever to trigger
         # fresh caches. The actual value is not relevant and doesn't need
         # to follow any explicit order. Since taskgraph bakes this file's
         # hash into cache names, any change to this file/version is sufficient
         # to force the use of a new cache.
         b'version=1',
-        # Include the UID and GID the task will run as to ensure that tasks
-        # with different UID and GID don't share the same cache.
-        b'uid=%d' % uid,
-        b'gid=%d' % gid,
     }
 
+    # Include the UID and GID the task will run as to ensure that tasks
+    # with different UID and GID don't share the same cache.
+    if uid is not None:
+        our_requirements.add(b'uid=%d' % uid)
+    if gid is not None:
+        our_requirements.add(b'gid=%d' % gid)
+
     def write_audit_entry(path, msg):
         now = datetime.datetime.utcnow().isoformat()
         with open(path, 'ab') as fh:
             fh.write(b'[%sZ %s] %s\n' % (
                      now, os.environ.get('TASK_ID', 'UNKNOWN'), msg))
 
     for cache in caches:
         if not os.path.isdir(cache):
@@ -559,17 +575,17 @@ def main(args):
         if running_as_root:
             os.chown(store_path, uid, gid)
 
     prepare_checkout_dir(args.vcs_checkout)
     prepare_checkout_dir(args.tools_checkout)
     if args.vcs_checkout or args.tools_checkout or args.comm_checkout:
         prepare_hg_store_path()
 
-    if running_as_root:
+    if IS_POSIX and running_as_root:
         # Drop permissions to requested user.
         # This code is modeled after what `sudo` was observed to do in a Docker
         # container. We do not bother calling setrlimit() because containers have
         # their own limits.
         print_line(b'setup', b'running as %s:%s\n' % (args.user, args.group))
         os.setgroups(gids)
         os.umask(022)
         os.setresgid(gid, gid, gid)