docker: use user-defined network for mozreview cluster (
bug 1290783); r?glob
Docker 1.12 requires things like links to be specified at container
create time instead of start time. If we were to retain using links,
we would have to drastically overhaul the container startup sequence.
And this would undo a lot of optimizations around our startup sequence,
which currently creates containers as early as possible to cut down
on start time.
Fortunately, there's a better way. Newer versions of Docker support
"user-defined networks." These are essentially named, isolated networks.
When using user-defined networks, containers that join networks can
specify "aliases." These magically get turned into resolveable
hostnames inside all containers in that network. This is a really
cool feature because it means hostnames and port numbers can be static
within the network. Before, you would have to sniff environment
variables at container start time to resolve IPs/hostnames and ports.
With constant aliases in user-defined networks, you can hardcode both
the hostname and the port.
This commit switches the MozReview cluster to use user-defined networks.
The network name is randomly generated. Containers in the network have
aliases giving each a sane hostname.
A lot of code around configuring containers at startup for dynamic
hostnames and ports has been removed since these are now static
properties. There is certainly follow-up work to remove more code
around container startup that was needed for the dynamic environment.
But I'll defer that to another day.
I've tested this commit on Docker 1.11 and 1.12 and it appears to
"just work" on both. I would have liked to split the changes to
docker.py into a smaller commit. But Docker 1.11 didn't like
moving host config to container create time without also changing the
networking config.
MozReview-Commit-ID: 18dH4CSPj5n
--- a/ansible/roles/docker-hg-reviewboard/defaults/main.yml
+++ b/ansible/roles/docker-hg-reviewboard/defaults/main.yml
@@ -1,5 +1,5 @@
---
domain: localhost
-ldap_uri: ldap://localhost/
+ldap_uri: ldap://ldap:389/
bind_dn: cn=admin,dc=mozilla
bind_pw: password
deleted file mode 100755
--- a/ansible/roles/docker-hg-reviewboard/files/entrypoint.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/python -u
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-import os
-import subprocess
-import sys
-
-if 'LDAP_PORT_389_TCP_ADDR' not in os.environ:
- print('error: contained invoked without link to an ldap contaer')
- sys.exit(1)
-
-ldap_url = 'ldap://%s:%s/' % (os.environ['LDAP_PORT_389_TCP_ADDR'],
- os.environ['LDAP_PORT_389_TCP_PORT'])
-
-os.environ['DOCKER_ENTRYPOINT'] = '1'
-
-subprocess.check_call([
- '/usr/bin/python', '-u',
- '/usr/bin/ansible-playbook', 'docker-hgrb.yml',
- '-c', 'local',
- '-t', 'docker-startup',
- '-e', 'ldap_uri=%s' % ldap_url,
- ],
- cwd='/vct/ansible')
-
-del os.environ['DOCKER_ENTRYPOINT']
-
-os.execl(sys.argv[1], *sys.argv[1:])
--- a/ansible/roles/docker-hg-reviewboard/files/supervisor-docker.conf
+++ b/ansible/roles/docker-hg-reviewboard/files/supervisor-docker.conf
@@ -1,12 +1,18 @@
[program:rsyslog]
command = /usr/sbin/rsyslogd -n
autorestart = true
redirect_stderr = true
+# We need to run nslcd or system integration with LDAP doesn't work.
+[program:nslcd]
+command = /usr/sbin/nslcd -d
+autorestart = true
+redirect_stderr = true
+
[program:sshd]
command = /usr/sbin/sshd -D
autorestart = true
[program:httpd]
command = /usr/sbin/httpd -DFOREGROUND
autorestart = true
--- a/ansible/roles/docker-hg-reviewboard/tasks/main.yml
+++ b/ansible/roles/docker-hg-reviewboard/tasks/main.yml
@@ -5,17 +5,16 @@
owner=root
group=root
mode=0644
- name: install Docker support scripts
copy: src={{ item }} dest=/{{ item }} mode=0755
with_items:
- create-repo
- - entrypoint.py
- kill-wsgi-procs
- refresh
- set-strip-users
- set-urls
- name: Install psutil
pip: name=psutil
@@ -26,9 +25,8 @@
- { section: reviewboard, option: password, value: mrpassword }
- name: add LDAP settings file
template: src=ldap.json.j2
dest=/etc/mercurial/ldap.json
owner=root
group=root
mode=0644
- tags: docker-startup
--- a/ansible/roles/docker-rbweb/files/entrypoint.py
+++ b/ansible/roles/docker-rbweb/files/entrypoint.py
@@ -3,40 +3,23 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
import os
import sys
-if 'BMOWEB_PORT_80_TCP_ADDR' not in os.environ:
- print('error: container invoked without link to a bmoweb container')
- sys.exit(1)
-
-if 'PULSE_PORT_5672_TCP_ADDR' not in os.environ:
- print('error: container invoked without link to a pulse container')
- sys.exit(1)
-
-if 'AUTOLAND_PORT_80_TCP_ADDR' not in os.environ:
- print('error: container invoked without link to an autoland container')
- sys.exit(1)
-
execfile('/venv/bin/activate_this.py', dict(__file__='/venv/bin/activate_this.py'))
sys.path.insert(0, '/reviewboard/conf')
os.environ['DJANGO_SETTINGS_MODULE'] = 'reviewboard.settings'
-bugzilla_url = 'http://%s:%s' % (os.environ['BMOWEB_PORT_80_TCP_ADDR'],
- os.environ['BMOWEB_PORT_80_TCP_PORT'])
-
-autoland_url = 'http://%s:%s' % (os.environ['AUTOLAND_PORT_80_TCP_ADDR'],
- os.environ['AUTOLAND_PORT_80_TCP_PORT'])
-
-ldap_url = 'ldap://%s:%s' % (os.environ['LDAP_PORT_389_TCP_ADDR'],
- os.environ['LDAP_PORT_389_TCP_PORT'])
+bugzilla_url = 'http://bmoweb'
+autoland_url = 'http://autoland'
+ldap_url = 'ldap://ldap:389'
# siteconfig takes priority over settings_local.py. Ensure siteconfig
# is up to date.
#
# This code mimics what is done in
# reviewboard.admin.management.sites.migrate_settings(). Its existence is
# unfortunate. If we could guarantee that settings_local.py never changes,
# we wouldn't need this.
@@ -56,18 +39,18 @@ sc.set('logging_directory', '/reviewboar
# reset this to Bugzilla's public IP, which is not available at this point.
# It is later set via set-site-url by vcttesting.mozreview.MozReview.
sc.set('auth_bz_xmlrpc_url', '%s/xmlrpc.cgi' % bugzilla_url)
sc.save()
# Define MozReview settings.
settings = {}
settings['enabled'] = True
-settings['pulse_host'] = os.environ['PULSE_PORT_5672_TCP_ADDR']
-settings['pulse_port'] = int(os.environ['PULSE_PORT_5672_TCP_PORT'])
+settings['pulse_host'] = 'pulse'
+settings['pulse_port'] = 5672
settings['pulse_user'] = 'guest'
settings['pulse_password'] = 'guest'
settings['pulse_ssl'] = False
settings['autoland_try_ui_enabled'] = True
settings['autoland_url'] = autoland_url
settings['autoland_user'] = 'autoland'
settings['autoland_password'] = 'autoland'
settings['autoland_testing'] = True
--- a/ansible/roles/openssh-lpk/tasks/main.yml
+++ b/ansible/roles/openssh-lpk/tasks/main.yml
@@ -1,15 +1,9 @@
---
-# This is to support executing in Docker.
-- name: Find LDAP URI
- set_fact: ldap_uri=ldap://{{ ansible_env.LDAP_PORT_389_TCP_ADDR }}:{{ ansible_env.LDAP_PORT_389_TCP_PORT }}/
- when: ansible_env.LDAP_PORT_389_TCP_ADDR is defined
- tags: docker-startup
-
# We don't use the yum module here because it is too slow.
- name: Regular OpenSSH cannot be installed
command: /usr/bin/yum remove -y openssh openssh-clients openssh-server
- name: Install packages related to LDAP auth
command: /usr/bin/yum install -y authconfig nss-pam-ldapd openldap-clients pam_ldap
- name: create directory for LDAP certificates
@@ -39,14 +33,12 @@
- { path: openssh-lpk-server-5.4p1-1.x86_64.rpm, sha256: 41f59067d9d41fe04f27c0702b14ed9bced00203cab1e9af2be6e3e7299ef4ee }
- name: Install RPMs related to LDAP auth
command: /usr/bin/yum localinstall -y /var/tmp/openssh-lpk-5.4p1-1.x86_64.rpm /var/tmp/openssh-lpk-clients-5.4p1-1.x86_64.rpm /var/tmp/openssh-lpk-server-5.4p1-1.x86_64.rpm
- name: Configure system authentication settings
template: src=nslcd.conf.j2 dest=/etc/nslcd.conf
notify: run authconfig
- tags: docker-startup
- name: Configure sshd
template: src=sshd_config.j2 dest=/etc/ssh/sshd_config
notify: restart sshd
- tags: docker-startup
--- a/testing/vcttesting/docker.py
+++ b/testing/vcttesting/docker.py
@@ -1011,78 +1011,113 @@ class Docker(object):
web_image = images['bmoweb']
pulse_image = images['pulse']
rbweb_image = images['rbweb']
hgweb_image = images['hgweb']
treestatus_image = images['treestatus']
containers = self.state['containers'].setdefault(cluster, [])
+ network_name = 'mozreview-%s' % uuid.uuid4()
+ self.client.create_network(network_name, driver='bridge')
+
with limited_threadpoolexecutor(10, max_workers) as e:
if start_pulse:
+ pulse_host_config = self.client.create_host_config(
+ port_bindings={5672: pulse_port})
f_pulse_create = e.submit(
self.client.create_container,
pulse_image,
+ host_config=pulse_host_config,
+ networking_config=self.network_config(network_name, 'pulse'),
labels=['pulse'])
bmo_url = 'http://%s:%s/' % (self.docker_hostname, http_port)
+ bmoweb_host_config = self.client.create_host_config(
+ port_bindings={80: http_port})
f_web_create = e.submit(
self.client.create_container,
web_image,
environment={'BMO_URL': bmo_url},
+ host_config=bmoweb_host_config,
+ networking_config=self.network_config(network_name, 'bmoweb'),
labels=['bmoweb'])
if start_rbweb:
+ rbweb_host_config = self.client.create_host_config(
+ port_bindings={80: rbweb_port})
f_rbweb_create = e.submit(
self.client.create_container,
rbweb_image,
command=['/run'],
entrypoint=['/entrypoint.py'],
+ host_config=rbweb_host_config,
+ networking_config=self.network_config(network_name, 'rbweb'),
ports=[80],
labels=['rbweb'])
if start_ldap:
+ ldap_host_config = self.client.create_host_config(
+ port_bindings={389: ldap_port})
f_ldap_create = e.submit(
self.client.create_container,
ldap_image,
+ host_config=ldap_host_config,
+ networking_config=self.network_config(network_name, 'ldap'),
labels=['ldap'])
if start_hgrb:
+ hgrb_host_config = self.client.create_host_config(
+ port_bindings={22: ssh_port, 80: hg_port})
f_hgrb_create = e.submit(
self.client.create_container,
hgrb_image,
ports=[22, 80],
- entrypoint=['/entrypoint.py'],
- command=['/usr/bin/supervisord', '-n'])
+ command=['/usr/bin/supervisord', '-n'],
+ host_config=hgrb_host_config,
+ networking_config=self.network_config(network_name, 'hgrb'))
if start_hgweb:
+ hgweb_host_config = self.client.create_host_config(
+ port_bindings={80: hgweb_port})
f_hgweb_create = e.submit(
self.client.create_container,
hgweb_image,
ports=[80],
entrypoint=['/entrypoint-solo'],
command=['/usr/bin/supervisord', '-n'],
+ host_config=hgweb_host_config,
+ networking_config=self.network_config(network_name, 'hgweb'),
labels=['hgweb'])
if start_autoland:
f_autolanddb_create = e.submit(
self.client.create_container,
autolanddb_image,
- labels=['autolanddb'])
+ labels=['autolanddb'],
+ networking_config=self.network_config(network_name, 'autolanddb'))
+ autoland_host_config = self.client.create_host_config(
+ port_bindings={80: autoland_port})
f_autoland_create = e.submit(
self.client.create_container,
autoland_image,
+ host_config=autoland_host_config,
+ networking_config=self.network_config(network_name, 'autoland'),
labels=['autolandweb'])
if start_treestatus:
+ treestatus_host_config = self.client.create_host_config(
+ port_bindings={80: treestatus_port})
f_treestatus_create = e.submit(
self.client.create_container,
treestatus_image,
+ host_config=treestatus_host_config,
+ networking_config=self.network_config(network_name, 'treestatus'),
labels=['treestatus'])
if start_autoland:
autolanddb_id = f_autolanddb_create.result()['Id']
containers.append(autolanddb_id)
f_start_autolanddb = e.submit(
self.client.start,
autolanddb_id)
@@ -1090,26 +1125,24 @@ class Docker(object):
# RabbitMQ takes a while to start up. Start it before other
# containers. (We probably could have a callback-driven mechanism
# here to ensure no time is lost. But that is more complex.)
if start_pulse:
pulse_id = f_pulse_create.result()['Id']
containers.append(pulse_id)
f_start_pulse = e.submit(
self.client.start,
- pulse_id,
- port_bindings={5672: pulse_port})
+ pulse_id)
if start_ldap:
ldap_id = f_ldap_create.result()['Id']
containers.append(ldap_id)
f_start_ldap = e.submit(
self.client.start,
- ldap_id,
- port_bindings={389: ldap_port})
+ ldap_id)
web_id = f_web_create.result()['Id']
containers.append(web_id)
if start_autoland:
f_start_autolanddb.result()
autolanddb_state = self.client.inspect_container(autolanddb_id)
autoland_id = f_autoland_create.result()['Id']
@@ -1132,70 +1165,54 @@ class Docker(object):
treestatus_id = f_treestatus_create.result()['Id']
containers.append(treestatus_id)
# At this point, all containers have been created.
self.save_state()
f_start_web = e.submit(
self.client.start,
- web_id,
- port_bindings={80: http_port})
+ web_id)
f_start_web.result()
web_state = self.client.inspect_container(web_id)
if start_pulse:
f_start_pulse.result()
pulse_state = self.client.inspect_container(pulse_id)
if start_ldap:
f_start_ldap.result()
ldap_state = self.client.inspect_container(ldap_id)
# TODO: Use futures for hgrb, hgweb and treestatus
if start_hgrb:
- self.client.start(hgrb_id,
- links=[(ldap_state['Name'], 'ldap')],
- port_bindings={22: ssh_port, 80: hg_port})
+ self.client.start(hgrb_id)
hgrb_state = self.client.inspect_container(hgrb_id)
if start_hgweb:
- self.client.start(hgweb_id,
- port_bindings={80: hgweb_port})
+ self.client.start(hgweb_id)
hgweb_state = self.client.inspect_container(hgweb_id)
if start_treestatus:
- self.client.start(treestatus_id,
- port_bindings={80: treestatus_port})
+ self.client.start(treestatus_id)
treestatus_state = self.client.inspect_container(treestatus_id)
if start_autoland:
assert start_hgrb
assert start_treestatus
f_start_autoland = e.submit(
self.client.start,
- autoland_id,
- links=[(autolanddb_state['Name'], 'autolanddb'),
- (web_state['Name'], 'bmoweb'),
- (hgrb_state['Name'], 'hgrb'),
- (treestatus_state['Name'], 'treestatus')],
- port_bindings={80: autoland_port})
+ autoland_id)
f_start_autoland.result()
autoland_state = self.client.inspect_container(autoland_id)
if start_rbweb:
assert start_autoland
self.client.start(
- rbweb_id,
- links=[(web_state['Name'], 'bmoweb'),
- (pulse_state['Name'], 'pulse'),
- (hgrb_state['Name'], 'hgrb'),
- (autoland_state['Name'], 'autoland'),
- (ldap_state['Name'], 'ldap')],
- port_bindings={80: rbweb_port})
+ rbweb_id)
rbweb_state = self.client.inspect_container(rbweb_id)
bmoweb_hostname, bmoweb_hostport = \
self._get_host_hostname_port(web_state, '80/tcp')
bmo_url = 'http://%s:%d/' % (bmoweb_hostname, bmoweb_hostport)
if start_pulse:
rabbit_hostname, rabbit_hostport = \
@@ -1323,21 +1340,17 @@ class Docker(object):
networks.add(network['NetworkID'])
count += 1
e.submit(self.client.remove_container, container, force=True,
v=True)
# There should only be 1, so don't use a ThreadPoolExecutor.
for network in networks:
- # TODO remove this lookup and check once using user-defined networks
- # everywhere.
- n = self.client.inspect_network(network)
- if n['Name'] != 'bridge':
- self.client.remove_network(network)
+ self.client.remove_network(network)
print('stopped %d containers' % count)
try:
del self.state['containers'][cluster]
self.save_state()
except KeyError:
pass