INCOMPLETE Bug 1396154 - Build Python from source and use it draft
authorGregory Szorc <gps@mozilla.com>
Fri, 01 Sep 2017 23:23:24 -0700
changeset 657857 71ff8f0c5f476aa764894965baabc770175bd70e
parent 657856 90279ec8a5e628214eec6fa060c969e6bfab6be2
child 729544 b94de05965378734ee44ed7ec23625a2723d67ae
push id77647
push usergszorc@mozilla.com
push dateSat, 02 Sep 2017 06:34:03 +0000
bugs1396154
milestone57.0a1
INCOMPLETE Bug 1396154 - Build Python from source and use it Installing Python through the system package manager pulls in a lot of things we may not want. This commit establishes a recipe for building Python 2.7 and 3.6 from source and installing it into a non-system location (/mozilla). We hook up this recipe to the lint image. This required some new functionality to allow recipes to communicate which packages they are installing, including temporarily. This allows our image builder to remove "temporary" packages before exiting, which can significantly reduce image size by removing things like compiler toolchains. The previous Python 3 package install has been removed from the lint image. (It was recently added and isn't widely used.) /mozilla/bin is added to PATH via /etc/profile.d. However, this may not take effect on non-interactive shells. run-task will update PATH if /mozilla/bin exists as a failsafe. Even with the double install of Python 2.7, this reduces the Docker image size from ~628MB to ~598MB. MozReview-Commit-ID: AQUaDCfbb90
taskcluster/docker/bootstrap/build-image-main
taskcluster/docker/bootstrap/shell-helper.sh
taskcluster/docker/lint/image.yml
taskcluster/docker/recipes/install-python.sh
taskcluster/docker/recipes/python.tt
taskcluster/docker/recipes/run-task
--- a/taskcluster/docker/bootstrap/build-image-main
+++ b/taskcluster/docker/bootstrap/build-image-main
@@ -97,16 +97,28 @@ def install_packages(packages):
     if have_apt():
         for package in packages:
             print('installing %s' % package)
             subprocess.check_call(['apt-get', 'install', '-y', package])
     else:
         raise Exception('do not know how to install packages')
 
 
+def remove_packages(packages):
+    if not packages:
+        return
+
+    if have_apt():
+        print('uninstalling %s' % ' '.join(sorted(packages)))
+        subprocess.check_call(['apt-get', 'remove', '-y'] + sorted(packages))
+        subprocess.check_call(['apt-get', 'autoremove', '-y'])
+    else:
+        raise Exception('do not know how to remove packages')
+
+
 def get_recipes(path):
     if not os.path.exists(path):
         return []
 
     recipes = []
 
     for recipe in sorted(os.listdir(path)):
         p = os.path.join(path, recipe)
@@ -131,16 +143,20 @@ def main():
     recipe_dir = os.path.join(here, 'recipes')
     recipes = get_recipes(recipe_dir)
 
     with open(os.path.join(here, 'downloads.json'), 'rb') as fh:
         downloads = json.load(fh)
 
     # At this point, input validation is complete. Now do some work.
 
+    # Set up /mozilla/bin on PATH.
+    with open('/etc/profile.d/mozilla.sh', 'wb') as fh:
+        fh.write(b'export PATH=/mozilla/bin:$PATH\n')
+
     # Copy run-task.
     os.makedirs('/builds/worker/bin', exist_ok=True)
     shutil.copyfile(os.path.join(here, 'run-task'),
                     '/builds/worker/bin/run-task')
 
     # Download requested files using the tooltool server. We don't use tooltool
     # itself because it isn't necessary.
     download_dir = os.path.join(here, 'downloads')
@@ -156,30 +172,55 @@ def main():
         # Force apt to be non-interactive.
         os.environb[b'DEBIAN_FRONTEND'] = b'noninteractive'
 
     with open(os.path.join(here, 'system-packages.json'), 'rb') as fh:
         packages = json.load(fh)
 
     install_packages(packages)
 
+    recipe_states = []
+
     # Now run each recipe in isolation.
 
     # TODO "sandbox" process to limit network accesses to well-defined
     # and allowed endpoints.
     with open(os.devnull, 'wb') as devnull:
         for recipe in recipes:
             print('executing recipe %s' % recipe)
+            state = {}
             full = os.path.join(recipe_dir, recipe)
             try:
                 subprocess.check_call(full, cwd=here, stdin=devnull)
             except subprocess.CalledProcessError as e:
                 print('ERROR: recipe %s exited %d' % (recipe, e.returncode))
                 return 1
 
+            for s in ('persisted', 'temporary'):
+                path = Path(here, 'system-packages.%s' % s)
+                try:
+                    with open(path, 'r') as fh:
+                        packages = set(p.strip() for p in fh if p.strip())
+
+                    state['system-packages-%s' % s] = packages
+                    os.unlink(path)
+                except FileNotFoundError:
+                    pass
+
+            recipe_states.append(state)
+
+    all_persisted_packages = set()
+    all_temporary_packages = set()
+    for state in recipe_states:
+        all_persisted_packages |= state.get('system-packages-persisted', set())
+        all_temporary_packages |= state.get('system-packages-temporary', set())
+
+    # Prune temporary packages.
+    remove_packages(all_temporary_packages - all_persisted_packages)
+
     # Normalize some permissions.
     u_worker = pwd.getpwnam('worker')
     g_worker = grp.getgrnam('worker')
     uid = u_worker.pw_uid
     gid = g_worker.gr_gid
 
     for root, dirs, files in os.walk('/builds/worker'):
         os.chown(root, uid, gid)
new file mode 100644
--- /dev/null
+++ b/taskcluster/docker/bootstrap/shell-helper.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This script provides helper functions for shell-based recipes. Simply source
+# it from a source to get useful helper functions.
+
+install_system_packages () {
+    for ent in "$@"; do
+        echo ${ent} >> system-packages.persisted
+    done
+
+    apt-get install -y ${@}
+}
+
+install_temporary_system_packages() {
+    for ent in "$@"; do
+        echo ${ent} >> system-packages.temporary
+    done
+
+    apt-get install -y ${@}
+}
--- a/taskcluster/docker/lint/image.yml
+++ b/taskcluster/docker/lint/image.yml
@@ -3,34 +3,37 @@ flavor: ubuntu1604
 volumes:
   - /builds/worker/.cache
   - /builds/worker/checkouts
 
 system-packages:
   - curl
   - locales
   - git
+  # Needed for building psutil in some tasks.
+  - make
   - python
   - python-pip
-  - python3-pip
   - sudo
   - wget
   - xz-utils
 
 tooltool-archives:
   - name: 'fzf'
     digest: '9f0ef6bf44b8622bd0e4e8b0b5b5c714c0a2ce4487e6f234e7d4caac458164c521949f4d84b8296274e8bd20966f835e26f6492ba499405d38b620181e82429e'
     dest: 'fzf'
     archive-format: 'tar-gz'
 
 tooltool-manifests:
   - tools/lint/eslint/manifest.tt
   - tools/lint/eslint/eslint-plugin-mozilla/manifest.tt
+  - taskcluster/docker/recipes/python.tt
 
 recipes:
   - install-node.sh
+  - install-python.sh
 
 extra-files:
   - tools/lint/python/flake8_requirements.txt
   - tools/lint/tox/tox_requirements.txt
 
 local-recipes:
   - system-setup.sh
new file mode 100755
--- /dev/null
+++ b/taskcluster/docker/recipes/install-python.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+set -ex
+
+. shell-helper.sh
+
+# These are needed for library dependencies. Install explicitly so they
+# don't get confused as optional from lacking a dependency.
+install_system_packages \
+    libbz2-1.0 \
+    libc6 \
+    libdb5.3 \
+    libexpat1 \
+    libffi6 \
+    libncursesw5 \
+    libreadline6 \
+    libsqlite3-0 \
+    libssl1.0.0 \
+    libtinfo5 \
+    mime-support \
+    zlib1g
+
+install_temporary_system_packages \
+    build-essential \
+    libbz2-dev \
+    libdb-dev \
+    libffi-dev \
+    libgdbm-dev \
+    liblzma-dev \
+    libncurses5-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    libssl-dev \
+    zlib1g-dev
+
+install_python () {
+    FULL=${1}
+    SHORT=`echo ${FULL} | awk -F. '{print $1"."$2}'`
+
+    cd downloads/Python-${FULL}/Python-${FULL}
+    ./configure --prefix=/mozilla --without-ensurepip
+    make -j$(grep -c ^processor /proc/cpuinfo)
+    # We don't need the test modules.
+    rm -rf Lib/test
+    make -j $(grep -c ^process /proc/cpuinfo) install
+    # Don't need static library.
+    find /mozilla/lib -name libpython${SHORT}.a -exec rm {} \;
+    # Don't need .pyo files.
+    find /mozilla/lib/python${SHORT} -type f -name '*.pyo' -exec rm {} \;
+    cd ../../..
+
+    (cd downloads/setuptools-36.2.7/setuptools-36.2.7 && /mozilla/bin/python${SHORT} setup.py install)
+    (cd downloads/pip-9.0.1/pip-9.0.1 && /mozilla/bin/python${SHORT} setup.py install)
+    /mozilla/bin/pip${SHORT} install downloads/virtualenv-15.1.0-py2.py3-none-any.whl
+}
+
+install_python 2.7.13
+install_python 3.6.2
new file mode 100644
--- /dev/null
+++ b/taskcluster/docker/recipes/python.tt
@@ -0,0 +1,36 @@
+[
+  {
+    "size": 17076672,
+    "digest": "52f0706a67e06b534f1c734c01f0f99ce12070c470d034b556d8665b846d2bc32964c096c8646fc2da9528336d7552500e598a0f0c46bff092dbe803aef2068c",
+    "algorithm": "sha512",
+    "filename": "Python-2.7.13.tar.gz",
+    "unpack": true
+  },
+  {
+    "algorithm": "sha512",
+    "digest": "61fd9e5d2632daa66c14f4fdf2e60798e5783f879088fcf7c087171bad090deb5ffcda29820ba638b03e7547bec9909aa6a2df7588160523f19b6f17f6fd5f7c",
+    "filename": "Python-3.6.2.tar.gz",
+    "size": 22580749,
+    "unpack": true
+  },
+  {
+    "algorithm": "sha512",
+    "digest": "9563879c1ed3be1153b6e06acef2af8c3598b3a5708b738bd3d90376f6e4c68c5a2dabbdda0ba3fb9dfe22d47466ec3f01471925c2abf50a7b4cf539269b0762",
+    "filename": "setuptools-36.2.7.zip",
+    "size": 716382,
+    "unpack": true
+  },
+  {
+    "algorithm": "sha512",
+    "digest": "ee59efb4b009ff6543b7afdea99b9cbbee1981ecc03af586acda76674024d3b66dab23049e68f3da9448734984619fc1eaba6e965c9dd3d731973376c8a42e25",
+    "size": 1197370,
+    "filename": "pip-9.0.1.tar.gz",
+    "unpack": true
+  },
+  {
+    "algorithm": "sha512",
+    "digest": "9988af801d9ad15c3f9831489ee9b49b54388e8349be201e7f7db3f2f1e59d033d3117f12e2f1909d65f052c5f1eacd87a894c6f7f703d770add3a0179e95863",
+    "size": 1820727,
+    "filename": "virtualenv-15.1.0-py2.py3-none-any.whl"
+  }
+]
--- a/taskcluster/docker/recipes/run-task
+++ b/taskcluster/docker/recipes/run-task
@@ -476,16 +476,24 @@ def main(args):
         # container. We do not bother calling setrlimit() because containers have
         # their own limits.
         print_line(b'setup', b'running as %s:%s\n' % (args.user, args.group))
         os.setgroups(gids)
         os.umask(022)
         os.setresgid(gid, gid, gid)
         os.setresuid(uid, uid, uid)
 
+    # If a /mozilla/bin directory exists, ensure it is on PATH.
+    if os.path.exists('/mozilla/bin'):
+        paths = os.environ['PATH'].split(os.path.pathsep)
+        if '/mozilla/bin' not in paths:
+            paths.insert(0, '/mozilla/bin')
+
+        os.environ['PATH'] = os.path.pathsep.join(paths)
+
     # Checkout the repository, setting the GECKO_HEAD_REV to the current
     # revision hash. Revision hashes have priority over symbolic revisions. We
     # disallow running tasks with symbolic revisions unless they have been
     # resolved by a checkout.
     if args.vcs_checkout:
         base_repo = os.environ.get('GECKO_BASE_REPOSITORY')
         # Some callers set the base repository to mozilla-central for historical
         # reasons. Switch to mozilla-unified because robustcheckout works best