Bug 1296503 - Switch config.status to unicode literals. r?ted draft
authorMike Hommey <mh+mozilla@glandium.org>
Thu, 18 Aug 2016 18:27:39 +0900
changeset 404297 2131623920e2aa38a8591696b881afe29aa0adaf
parent 404296 82c715bc29fc61dfe9a10ca97420301e1e95a697
child 404298 466b01f40988f86b84fa3d736e44e234d6fb12ea
push id27158
push userbmo:mh+mozilla@glandium.org
push dateTue, 23 Aug 2016 05:25:26 +0000
reviewersted
bugs1296503
milestone51.0a1
Bug 1296503 - Switch config.status to unicode literals. r?ted Ironically, the first thing we do with those unicode literals is convert them to byte strings because the build backends don't like them yet.
configure.py
python/mozbuild/mozbuild/util.py
--- a/configure.py
+++ b/configure.py
@@ -1,26 +1,28 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import print_function, unicode_literals
 
 import codecs
-import json
 import os
 import subprocess
 import sys
-
-from collections import Iterable
+import textwrap
 
 
 base_dir = os.path.abspath(os.path.dirname(__file__))
 sys.path.insert(0, os.path.join(base_dir, 'python', 'mozbuild'))
 from mozbuild.configure import ConfigureSandbox
+from mozbuild.util import (
+    indented_repr,
+    encode,
+)
 
 
 def main(argv):
     config = {}
     sandbox = ConfigureSandbox(config, os.environ, argv)
     sandbox.run(os.path.join(os.path.dirname(__file__), 'moz.configure'))
 
     if sandbox._help:
@@ -57,62 +59,47 @@ def config_status(config):
     # Create config.status. Eventually, we'll want to just do the work it does
     # here, when we're able to skip configure tests/use cached results/not rely
     # on autoconf.
     print("Creating config.status", file=sys.stderr)
     encoding = 'mbcs' if sys.platform == 'win32' else 'utf-8'
     with codecs.open('config.status', 'w', encoding) as fh:
         fh.write('#!%s\n' % config['PYTHON'])
         fh.write('# coding=%s\n' % encoding)
-        # Because we're serializing as JSON but reading as python, the values
-        # for True, False and None are true, false and null, which don't exist.
-        # Define them.
-        fh.write('true, false, null = True, False, None\n')
+        fh.write('from __future__ import unicode_literals\n')
+        fh.write('from mozbuild.util import encode\n')
+        # A lot of the build backend code is currently expecting byte
+        # strings and breaks in subtle ways with unicode strings.
+        fh.write("encoding = '%s'\n" % encoding)
         for k, v in sanitized_config.iteritems():
-            fh.write('%s = ' % k)
-            json.dump(v, fh, sort_keys=True, indent=4, ensure_ascii=False)
-            fh.write('\n')
+            fh.write('%s = encode(%s, encoding)\n' % (k, indented_repr(v)))
         fh.write("__all__ = ['topobjdir', 'topsrcdir', 'defines', "
                  "'non_global_defines', 'substs', 'mozconfig']")
 
         if config.get('MOZ_BUILD_APP') != 'js' or config.get('JS_STANDALONE'):
-            fh.write('''
-if __name__ == '__main__':
-    args = dict([(name, globals()[name]) for name in __all__])
-    from mozbuild.config_status import config_status
-    config_status(**args)
-''')
-
-    # Running config.status standalone uses byte literals for all the config,
-    # instead of the unicode literals we have in sanitized_config right now.
-    # Some values in sanitized_config also have more complex types, such as
-    # EnumString, which using when calling config_status would currently break
-    # the build, as well as making it inconsistent with re-running
-    # config.status. Fortunately, EnumString derives from unicode, so it's
-    # covered by converting unicode strings.
-    # Moreover, a lot of the build backend code is currently expecting byte
-    # strings and breaks in subtle ways with unicode strings.
-    def encode(v):
-        if isinstance(v, dict):
-            return {
-                encode(k): encode(val)
-                for k, val in v.iteritems()
-            }
-        if isinstance(v, str):
-            return v
-        if isinstance(v, unicode):
-            return v.encode(encoding)
-        if isinstance(v, Iterable):
-            return [encode(i) for i in v]
-        return v
+            fh.write(textwrap.dedent('''
+                if __name__ == '__main__':
+                    from mozbuild.config_status import config_status
+                    args = dict([(name, globals()[name]) for name in __all__])
+                    config_status(**args)
+            '''))
 
     # Other things than us are going to run this file, so we need to give it
     # executable permissions.
     os.chmod('config.status', 0o755)
     if config.get('MOZ_BUILD_APP') != 'js' or config.get('JS_STANDALONE'):
         os.environ[b'WRITE_MOZINFO'] = b'1'
         from mozbuild.config_status import config_status
-        return config_status(args=[], **encode(sanitized_config))
+
+        # Some values in sanitized_config also have more complex types, such as
+        # EnumString, which using when calling config_status would currently
+        # break the build, as well as making it inconsistent with re-running
+        # config.status. Fortunately, EnumString derives from unicode, so it's
+        # covered by converting unicode strings.
+
+        # A lot of the build backend code is currently expecting byte strings
+        # and breaks in subtle ways with unicode strings.
+        return config_status(args=[], **encode(sanitized_config, encoding))
     return 0
 
 
 if __name__ == '__main__':
     sys.exit(main(sys.argv))
--- a/python/mozbuild/mozbuild/util.py
+++ b/python/mozbuild/mozbuild/util.py
@@ -19,16 +19,17 @@ import os
 import re
 import stat
 import sys
 import time
 import types
 
 from collections import (
     defaultdict,
+    Iterable,
     OrderedDict,
 )
 from io import (
     StringIO,
     BytesIO,
 )
 
 
@@ -1239,8 +1240,24 @@ def indented_repr(o, indent=4):
                 for d in recurse_indented_repr(i, level + 1):
                     yield d
                 yield ',\n'
             yield one_indent * level
             yield ']'
         else:
             yield repr(o)
     return ''.join(recurse_indented_repr(o, 0))
+
+
+def encode(obj, encoding='utf-8'):
+    '''Recursively encode unicode strings with the given encoding.'''
+    if isinstance(obj, dict):
+        return {
+            encode(k, encoding): encode(v, encoding)
+            for k, v in obj.iteritems()
+        }
+    if isinstance(obj, bytes):
+        return obj
+    if isinstance(obj, unicode):
+        return obj.encode(encoding)
+    if isinstance(obj, Iterable):
+        return [encode(i, encoding) for i in obj]
+    return obj