Bug 1275409: remove taskcluster_graph.image_builder; r?wcosta draft
authorDustin J. Mitchell <dustin@mozilla.com>
Mon, 06 Jun 2016 18:55:10 +0000
changeset 376702 f0ebd4b74c75d6b62327df85a4140112beaa393e
parent 376701 29c48dfa679e325b8eaea6d85187c9189eeedbc8
child 376703 f38a5f406619d7356124d9e75a8ef5b94ca310ea
push id20643
push userdmitchell@mozilla.com
push dateWed, 08 Jun 2016 13:31:04 +0000
reviewerswcosta
bugs1275409
milestone50.0a1
Bug 1275409: remove taskcluster_graph.image_builder; r?wcosta MozReview-Commit-ID: 21HBtcDVPXC
taskcluster/mach_commands.py
taskcluster/taskgraph/docker.py
taskcluster/taskgraph/kind/docker_image.py
taskcluster/taskgraph/kind/legacy.py
taskcluster/taskgraph/test/test_kind_docker_image.py
taskcluster/taskgraph/test/test_util_docker.py
taskcluster/taskgraph/util/docker.py
testing/taskcluster/taskcluster_graph/image_builder.py
--- a/taskcluster/mach_commands.py
+++ b/taskcluster/mach_commands.py
@@ -213,29 +213,22 @@ class LoadImage(object):
         description="Load a pre-built Docker image")
     @CommandArgument('--task-id',
         help="Load the image at public/image.tar in this task, rather than "
              "searching the index")
     @CommandArgument('image_name', nargs='?',
         help="Load the image of this name based on the current contents of the tree "
              "(as built for mozilla-central or mozilla-inbound)")
     def load_image(self, image_name, task_id):
-        from taskcluster_graph.image_builder import (
-            task_id_for_image,
-            docker_load_from_url
-        )
-
+        from taskgraph.docker import load_image_by_name, load_image_by_task_id
         if not image_name and not task_id:
             print("Specify either IMAGE-NAME or TASK-ID")
             sys.exit(1)
-
-        if not task_id:
-            task_id = task_id_for_image({}, 'mozilla-inbound', image_name, create=False)
-            if not task_id:
-                print("No task found in the TaskCluster index for", image_name)
+        try:
+            if task_id:
+                ok = load_image_by_task_id(task_id)
+            else:
+                ok = load_image_by_name(image_name)
+            if not ok:
                 sys.exit(1)
-
-        print("Task ID:", task_id)
-
-        ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
-        image_name = docker_load_from_url(ARTIFACT_URL.format(task_id, 'public/image.tar'))
-
-        print("Loaded image is named", image_name)
+        except Exception as e:
+            traceback.print_exc()
+            sys.exit(1)
new file mode 100644
--- /dev/null
+++ b/taskcluster/taskgraph/docker.py
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import json
+import os
+import subprocess
+import tarfile
+import urllib2
+
+from taskgraph.util import docker
+
+GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..'))
+INDEX_URL = 'https://index.taskcluster.net/v1/task/docker.images.v1.{}.{}.hash.{}'
+ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
+
+
+def load_image_by_name(image_name):
+    context_path = os.path.join(GECKO, 'testing', 'docker', image_name)
+    context_hash = docker.generate_context_hash(context_path)
+
+    image_index_url = INDEX_URL.format('mozilla-central', image_name, context_hash)
+    print("Fetching", image_index_url)
+    task = json.load(urllib2.urlopen(image_index_url))
+
+    return load_image_by_task_id(task['taskId'])
+
+
+def load_image_by_task_id(task_id):
+    # because we need to read this file twice (and one read is not all the way
+    # through), it is difficult to stream it.  So we download to disk and then
+    # read it back.
+    filename = 'temp-docker-image.tar'
+
+    artifact_url = ARTIFACT_URL.format(task_id, 'public/image.tar')
+    print("Downloading", artifact_url)
+    subprocess.check_call(['curl', '-#', '-L', '-o', filename, artifact_url])
+
+    print("Determining image name")
+    tf = tarfile.open(filename)
+    repositories = json.load(tf.extractfile('repositories'))
+    name = repositories.keys()[0]
+    tag = repositories[name].keys()[0]
+    name = '{}:{}'.format(name, tag)
+    print("Image name:", name)
+
+    print("Loading image into docker")
+    try:
+        subprocess.check_call(['docker', 'load', '-i', filename])
+    except subprocess.CalledProcessError:
+        print("*** `docker load` failed.  You may avoid re-downloading that tarball by fixing the")
+        print("*** problem and running `docker load < {}`.".format(filename))
+        raise
+
+    print("Deleting temporary file")
+    os.unlink(filename)
+
+    print("The requested docker image is now available as", name)
+    print("Try: docker run -ti --rm {} bash".format(name))
--- a/taskcluster/taskgraph/kind/docker_image.py
+++ b/taskcluster/taskgraph/kind/docker_image.py
@@ -9,17 +9,20 @@ import json
 import os
 import urllib2
 import hashlib
 import tarfile
 import time
 
 from . import base
 from ..types import Task
-from taskgraph.util.docker import docker_image
+from taskgraph.util.docker import (
+    docker_image,
+    generate_context_hash
+)
 from taskgraph.util.templates import Templates
 from taskgraph.util.time import (
     json_time_from_now,
     current_json_time,
 )
 
 logger = logging.getLogger(__name__)
 GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..', '..'))
@@ -53,17 +56,17 @@ class DockerImageKind(base.Kind):
             'source': '{repo}file/{rev}/testing/taskcluster/tasks/image.yml'
                     .format(repo=params['head_repository'], rev=params['head_rev']),
         }
 
         tasks = []
         templates = Templates(self.path)
         for image_name in self.config['images']:
             context_path = os.path.join('testing', 'docker', image_name)
-            context_hash = self.generate_context_hash(context_path)
+            context_hash = generate_context_hash(context_path)
 
             image_parameters = dict(parameters)
             image_parameters['context_hash'] = context_hash
             image_parameters['context_path'] = context_path
             image_parameters['artifact_path'] = 'public/image.tar'
             image_parameters['image_name'] = image_name
 
             image_artifact_path = "public/decision_task/image_contexts/{}/context.tar.gz".format(image_name)
@@ -127,34 +130,8 @@ class DockerImageKind(base.Kind):
     def create_context_tar(self, context_dir, destination, image_name):
         'Creates a tar file of a particular context directory.'
         destination = os.path.abspath(destination)
         if not os.path.exists(os.path.dirname(destination)):
             os.makedirs(os.path.dirname(destination))
 
         with tarfile.open(destination, 'w:gz') as tar:
             tar.add(context_dir, arcname=image_name)
-
-    def generate_context_hash(self, image_path):
-        '''Generates a sha256 hash for context directory used to build an image.
-
-        Contents of the directory are sorted alphabetically, contents of each file is hashed,
-        and then a hash is created for both the file hashes as well as their paths.
-
-        This ensures that hashs are consistent and also change based on if file locations
-        within the context directory change.
-        '''
-        context_hash = hashlib.sha256()
-        files = []
-
-        for dirpath, dirnames, filenames in os.walk(os.path.join(GECKO, image_path)):
-            for filename in filenames:
-                files.append(os.path.join(dirpath, filename))
-
-        for filename in sorted(files):
-            relative_filename = filename.replace(GECKO, '')
-            with open(filename, 'rb') as f:
-                file_hash = hashlib.sha256()
-                data = f.read()
-                file_hash.update(data)
-                context_hash.update(file_hash.hexdigest() + '\t' + relative_filename + '\n')
-
-        return context_hash.hexdigest()
--- a/taskcluster/taskgraph/kind/legacy.py
+++ b/taskcluster/taskgraph/kind/legacy.py
@@ -53,20 +53,16 @@ TREEHERDER_ROUTES = {
 # time after which a try build's results will expire
 TRY_EXPIRATION = "14 days"
 
 logger = logging.getLogger(__name__)
 
 def mklabel():
     return TASKID_PLACEHOLDER.format(slugid())
 
-# monkey-patch mklabel into image_builder, as well
-from taskcluster_graph import image_builder
-image_builder.mklabel = mklabel
-
 def set_expiration(task, timestamp):
     task_def = task['task']
     task_def['expires'] = timestamp
     if task_def.get('deadline', timestamp) > timestamp:
         task_def['deadline'] = timestamp
 
     try:
         artifacts = task_def['payload']['artifacts']
--- a/taskcluster/taskgraph/test/test_kind_docker_image.py
+++ b/taskcluster/taskgraph/test/test_kind_docker_image.py
@@ -29,26 +29,10 @@ class TestDockerImageKind(unittest.TestC
 
     def test_create_context_tar(self):
         image_dir = os.path.join(docker_image.GECKO, 'testing', 'docker', 'image_builder')
         tarball = tempfile.mkstemp()[1]
         self.kind.create_context_tar(image_dir, tarball, 'image_builder')
         self.failUnless(os.path.exists(tarball))
         os.unlink(tarball)
 
-    def test_generate_context_hash(self):
-        tmpdir = tempfile.mkdtemp()
-        old_GECKO = docker_image.GECKO
-        docker_image.GECKO = tmpdir
-        try:
-            os.makedirs(os.path.join(tmpdir, 'docker', 'my-image'))
-            with open(os.path.join(tmpdir, 'docker', 'my-image', 'Dockerfile'), "w") as f:
-                f.write("FROM node\nADD a-file\n")
-            with open(os.path.join(tmpdir, 'docker', 'my-image', 'a-file'), "w") as f:
-                f.write("data\n")
-            self.assertEqual(self.kind.generate_context_hash('docker/my-image'),
-                    '781143fcc6cc72c9024b058665265cb6bae3fb8031cad7227dd169ffbfced434')
-        finally:
-            docker_image.GECKO = old_GECKO
-            shutil.rmtree(tmpdir)
-
 if __name__ == '__main__':
     main()
--- a/taskcluster/taskgraph/test/test_util_docker.py
+++ b/taskcluster/taskgraph/test/test_util_docker.py
@@ -1,27 +1,46 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import os
+import shutil
+import tempfile
 import unittest
 
-from ..util.docker import docker_image, DOCKER_ROOT
+from ..util import docker
 from mozunit import main, MockedOpen
 
 
-class TestDockerImage(unittest.TestCase):
+class TestDocker(unittest.TestCase):
+
+    def test_generate_context_hash(self):
+        tmpdir = tempfile.mkdtemp()
+        old_GECKO = docker.GECKO
+        docker.GECKO = tmpdir
+        try:
+            os.makedirs(os.path.join(tmpdir, 'docker', 'my-image'))
+            with open(os.path.join(tmpdir, 'docker', 'my-image', 'Dockerfile'), "w") as f:
+                f.write("FROM node\nADD a-file\n")
+            with open(os.path.join(tmpdir, 'docker', 'my-image', 'a-file'), "w") as f:
+                f.write("data\n")
+            self.assertEqual(docker.generate_context_hash('docker/my-image'),
+                    '781143fcc6cc72c9024b058665265cb6bae3fb8031cad7227dd169ffbfced434')
+        finally:
+            docker.GECKO = old_GECKO
+            shutil.rmtree(tmpdir)
 
     def test_docker_image_explicit_registry(self):
         files = {}
-        files["{}/myimage/REGISTRY".format(DOCKER_ROOT)] = "cool-images"
-        files["{}/myimage/VERSION".format(DOCKER_ROOT)] = "1.2.3"
+        files["{}/myimage/REGISTRY".format(docker.DOCKER_ROOT)] = "cool-images"
+        files["{}/myimage/VERSION".format(docker.DOCKER_ROOT)] = "1.2.3"
         with MockedOpen(files):
-            self.assertEqual(docker_image('myimage'), "cool-images/myimage:1.2.3")
+            self.assertEqual(docker.docker_image('myimage'), "cool-images/myimage:1.2.3")
 
     def test_docker_image_default_registry(self):
         files = {}
-        files["{}/REGISTRY".format(DOCKER_ROOT)] = "mozilla"
-        files["{}/myimage/VERSION".format(DOCKER_ROOT)] = "1.2.3"
+        files["{}/REGISTRY".format(docker.DOCKER_ROOT)] = "mozilla"
+        files["{}/myimage/VERSION".format(docker.DOCKER_ROOT)] = "1.2.3"
         with MockedOpen(files):
-            self.assertEqual(docker_image('myimage'), "mozilla/myimage:1.2.3")
+            self.assertEqual(docker.docker_image('myimage'), "mozilla/myimage:1.2.3")
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@@ -1,26 +1,54 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import hashlib
 import os
 
 GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..', '..'))
 DOCKER_ROOT = os.path.join(GECKO, 'testing', 'docker')
+ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
 
 def docker_image(name):
     '''Determine the docker image name, including repository and tag, from an
     in-tree docker file.'''
     try:
         with open(os.path.join(DOCKER_ROOT, name, 'REGISTRY')) as f:
             registry = f.read().strip()
     except IOError:
         with open(os.path.join(DOCKER_ROOT, 'REGISTRY')) as f:
             registry = f.read().strip()
 
     with open(os.path.join(DOCKER_ROOT, name, 'VERSION')) as f:
         version = f.read().strip()
 
     return '{}/{}:{}'.format(registry, name, version)
 
+
+def generate_context_hash(image_path):
+    '''Generates a sha256 hash for context directory used to build an image.
+
+    Contents of the directory are sorted alphabetically, contents of each file is hashed,
+    and then a hash is created for both the file hashs as well as their paths.
+
+    This ensures that hashs are consistent and also change based on if file locations
+    within the context directory change.
+    '''
+    context_hash = hashlib.sha256()
+    files = []
+
+    for dirpath, dirnames, filenames in os.walk(os.path.join(GECKO, image_path)):
+        for filename in filenames:
+            files.append(os.path.join(dirpath, filename))
+
+    for filename in sorted(files):
+        relative_filename = filename.replace(GECKO, '')
+        with open(filename, 'rb') as f:
+            file_hash = hashlib.sha256()
+            data = f.read()
+            file_hash.update(data)
+            context_hash.update(file_hash.hexdigest() + '\t' + relative_filename + '\n')
+
+    return context_hash.hexdigest()
deleted file mode 100644
--- a/testing/taskcluster/taskcluster_graph/image_builder.py
+++ /dev/null
@@ -1,153 +0,0 @@
-import hashlib
-import json
-import os
-import subprocess
-import tarfile
-import urllib2
-
-from slugid import nice as slugid
-from taskgraph.util.templates import Templates
-
-TASKCLUSTER_ROOT = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
-IMAGE_BUILD_TASK = os.path.join(TASKCLUSTER_ROOT, 'tasks', 'image.yml')
-GECKO = os.path.realpath(os.path.join(TASKCLUSTER_ROOT, '..', '..'))
-DOCKER_ROOT = os.path.join(GECKO, 'testing', 'docker')
-REGISTRY = open(os.path.join(DOCKER_ROOT, 'REGISTRY')).read().strip()
-INDEX_URL = 'https://index.taskcluster.net/v1/task/docker.images.v1.{}.{}.hash.{}'
-ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
-DEFINE_TASK = 'queue:define-task:aws-provisioner-v1/{}'
-
-def is_docker_registry_image(registry_path):
-    return os.path.isfile(registry_path)
-
-# make a task label; in old decision tasks, this is a regular slugid, but when called
-# from the taskgraph generator's legacy kind, this is monkey-patched to return a label
-# (`TaskLabel==..`)
-def mklabel():
-    return slugid()
-
-def task_id_for_image(seen_images, project, name, create=True):
-    if name in seen_images:
-        return seen_images[name]['taskId']
-
-    context_path = os.path.join('testing', 'docker', name)
-    context_hash = generate_context_hash(context_path)
-    task_id = get_task_id_for_namespace(project, name, context_hash)
-
-    if task_id:
-        seen_images[name] = {'taskId': task_id}
-        return task_id
-
-    if not create:
-        return None
-
-    task_id = mklabel()
-    seen_images[name] = {
-        'taskId': task_id,
-        'path': context_path,
-        'hash': context_hash
-    }
-
-    return task_id
-
-def image_artifact_exists_for_task_id(task_id, path):
-    ''' Verifies that the artifact exists for the task ID '''
-    try:
-        request = urllib2.Request(ARTIFACT_URL.format(task_id, path))
-        request.get_method = lambda : 'HEAD'
-        urllib2.urlopen(request)
-        return True
-    except urllib2.HTTPError,e:
-        return False
-
-def get_task_id_for_namespace(project, name, context_hash):
-    '''
-    Determine the Task ID for an indexed image.
-
-    As an optimization, if the context hash exists for mozilla-central, that image
-    task ID will be used.  The reasoning behind this is that eventually everything ends
-    up on mozilla-central at some point if most tasks use this as a common image
-    for a given context hash, a worker within Taskcluster does not need to contain
-    the same image per branch.
-    '''
-    for p in ['mozilla-central', project]:
-        image_index_url = INDEX_URL.format(p, name, context_hash)
-        try:
-            task = json.load(urllib2.urlopen(image_index_url))
-            # Ensure that the artifact exists for the task and hasn't expired
-            artifact_exists = image_artifact_exists_for_task_id(task['taskId'],
-                                                                'public/image.tar')
-            # Only return the task ID if the artifact exists for the indexed
-            # task.  Otherwise, continue on looking at each of the branches.  Method
-            # continues trying other branches in case mozilla-central has an expired
-            # artifact, but 'project' might not. Only return no task ID if all
-            # branches have been tried
-            if artifact_exists:
-                return task['taskId']
-        except urllib2.HTTPError:
-            pass
-
-    return None
-
-def generate_context_hash(image_path):
-    '''
-    Generates a sha256 hash for context directory used to build an image.
-
-    Contents of the directory are sorted alphabetically, contents of each file is hashed,
-    and then a hash is created for both the file hashs as well as their paths.
-
-    This ensures that hashs are consistent and also change based on if file locations
-    within the context directory change.
-    '''
-    context_hash = hashlib.sha256()
-    files = []
-
-    for dirpath, dirnames, filenames in os.walk(os.path.join(GECKO, image_path)):
-        for filename in filenames:
-            files.append(os.path.join(dirpath, filename))
-
-    for filename in sorted(files):
-        relative_filename = filename.replace(GECKO, '')
-        with open(filename, 'rb') as f:
-            file_hash = hashlib.sha256()
-            while True:
-                data = f.read()
-                if not data:
-                    break
-                file_hash.update(data)
-            context_hash.update(file_hash.hexdigest() + '\t' + relative_filename + '\n')
-
-    return context_hash.hexdigest()
-
-def docker_load_from_url(url):
-    """Get a docker image from a `docker save` tarball at the given URL,
-    loading it into the running daemon and returning the image name."""
-
-    # because we need to read this file twice (and one read is not all the way
-    # through), it is difficult to stream it.  So we downlaod to disk and then
-    # read it back.
-    filename = 'temp-docker-image.tar'
-
-    print("Downloading {}".format(url))
-    subprocess.check_call(['curl', '-#', '-L', '-o', filename, url])
-
-    print("Determining image name")
-    tf = tarfile.open(filename)
-    repositories = json.load(tf.extractfile('repositories'))
-    name = repositories.keys()[0]
-    tag = repositories[name].keys()[0]
-    name = '{}:{}'.format(name, tag)
-    print("Image name: {}".format(name))
-
-    print("Loading image into docker")
-    try:
-        subprocess.check_call(['docker', 'load', '-i', filename])
-    except subprocess.CalledProcessError:
-        print("*** `docker load` failed.  You may avoid re-downloading that tarball by fixing the")
-        print("*** problem and running `docker load < {}`.".format(filename))
-        raise
-
-    print("Deleting temporary file")
-    os.unlink(filename)
-
-    return name