Bug 1296503 - Switch config.status to unicode literals. r?ted draft
authorMike Hommey <mh+mozilla@glandium.org>
Thu, 18 Aug 2016 18:27:39 +0900
changeset 404297 2131623920e2aa38a8591696b881afe29aa0adaf
parent 404296 82c715bc29fc61dfe9a10ca97420301e1e95a697
child 404298 466b01f40988f86b84fa3d736e44e234d6fb12ea
push id27158
push userbmo:mh+mozilla@glandium.org
push dateTue, 23 Aug 2016 05:25:26 +0000
Bug 1296503 - Switch config.status to unicode literals. r?ted Ironically, the first thing we do with those unicode literals is convert them to byte strings because the build backends don't like them yet.
--- a/configure.py
+++ b/configure.py
@@ -1,26 +1,28 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 from __future__ import print_function, unicode_literals
 import codecs
-import json
 import os
 import subprocess
 import sys
-from collections import Iterable
+import textwrap
 base_dir = os.path.abspath(os.path.dirname(__file__))
 sys.path.insert(0, os.path.join(base_dir, 'python', 'mozbuild'))
 from mozbuild.configure import ConfigureSandbox
+from mozbuild.util import (
+    indented_repr,
+    encode,
 def main(argv):
     config = {}
     sandbox = ConfigureSandbox(config, os.environ, argv)
     sandbox.run(os.path.join(os.path.dirname(__file__), 'moz.configure'))
     if sandbox._help:
@@ -57,62 +59,47 @@ def config_status(config):
     # Create config.status. Eventually, we'll want to just do the work it does
     # here, when we're able to skip configure tests/use cached results/not rely
     # on autoconf.
     print("Creating config.status", file=sys.stderr)
     encoding = 'mbcs' if sys.platform == 'win32' else 'utf-8'
     with codecs.open('config.status', 'w', encoding) as fh:
         fh.write('#!%s\n' % config['PYTHON'])
         fh.write('# coding=%s\n' % encoding)
-        # Because we're serializing as JSON but reading as python, the values
-        # for True, False and None are true, false and null, which don't exist.
-        # Define them.
-        fh.write('true, false, null = True, False, None\n')
+        fh.write('from __future__ import unicode_literals\n')
+        fh.write('from mozbuild.util import encode\n')
+        # A lot of the build backend code is currently expecting byte
+        # strings and breaks in subtle ways with unicode strings.
+        fh.write("encoding = '%s'\n" % encoding)
         for k, v in sanitized_config.iteritems():
-            fh.write('%s = ' % k)
-            json.dump(v, fh, sort_keys=True, indent=4, ensure_ascii=False)
-            fh.write('\n')
+            fh.write('%s = encode(%s, encoding)\n' % (k, indented_repr(v)))
         fh.write("__all__ = ['topobjdir', 'topsrcdir', 'defines', "
                  "'non_global_defines', 'substs', 'mozconfig']")
         if config.get('MOZ_BUILD_APP') != 'js' or config.get('JS_STANDALONE'):
-            fh.write('''
-if __name__ == '__main__':
-    args = dict([(name, globals()[name]) for name in __all__])
-    from mozbuild.config_status import config_status
-    config_status(**args)
-    # Running config.status standalone uses byte literals for all the config,
-    # instead of the unicode literals we have in sanitized_config right now.
-    # Some values in sanitized_config also have more complex types, such as
-    # EnumString, which using when calling config_status would currently break
-    # the build, as well as making it inconsistent with re-running
-    # config.status. Fortunately, EnumString derives from unicode, so it's
-    # covered by converting unicode strings.
-    # Moreover, a lot of the build backend code is currently expecting byte
-    # strings and breaks in subtle ways with unicode strings.
-    def encode(v):
-        if isinstance(v, dict):
-            return {
-                encode(k): encode(val)
-                for k, val in v.iteritems()
-            }
-        if isinstance(v, str):
-            return v
-        if isinstance(v, unicode):
-            return v.encode(encoding)
-        if isinstance(v, Iterable):
-            return [encode(i) for i in v]
-        return v
+            fh.write(textwrap.dedent('''
+                if __name__ == '__main__':
+                    from mozbuild.config_status import config_status
+                    args = dict([(name, globals()[name]) for name in __all__])
+                    config_status(**args)
+            '''))
     # Other things than us are going to run this file, so we need to give it
     # executable permissions.
     os.chmod('config.status', 0o755)
     if config.get('MOZ_BUILD_APP') != 'js' or config.get('JS_STANDALONE'):
         os.environ[b'WRITE_MOZINFO'] = b'1'
         from mozbuild.config_status import config_status
-        return config_status(args=[], **encode(sanitized_config))
+        # Some values in sanitized_config also have more complex types, such as
+        # EnumString, which using when calling config_status would currently
+        # break the build, as well as making it inconsistent with re-running
+        # config.status. Fortunately, EnumString derives from unicode, so it's
+        # covered by converting unicode strings.
+        # A lot of the build backend code is currently expecting byte strings
+        # and breaks in subtle ways with unicode strings.
+        return config_status(args=[], **encode(sanitized_config, encoding))
     return 0
 if __name__ == '__main__':
--- a/python/mozbuild/mozbuild/util.py
+++ b/python/mozbuild/mozbuild/util.py
@@ -19,16 +19,17 @@ import os
 import re
 import stat
 import sys
 import time
 import types
 from collections import (
+    Iterable,
 from io import (
@@ -1239,8 +1240,24 @@ def indented_repr(o, indent=4):
                 for d in recurse_indented_repr(i, level + 1):
                     yield d
                 yield ',\n'
             yield one_indent * level
             yield ']'
             yield repr(o)
     return ''.join(recurse_indented_repr(o, 0))
+def encode(obj, encoding='utf-8'):
+    '''Recursively encode unicode strings with the given encoding.'''
+    if isinstance(obj, dict):
+        return {
+            encode(k, encoding): encode(v, encoding)
+            for k, v in obj.iteritems()
+        }
+    if isinstance(obj, bytes):
+        return obj
+    if isinstance(obj, unicode):
+        return obj.encode(encoding)
+    if isinstance(obj, Iterable):
+        return [encode(i, encoding) for i in obj]
+    return obj