Bug 1384570 - Part 3 - Encode output into byte strings using utf8. r?Pike draft
authorStaś Małolepszy <stas@mozilla.com>
Tue, 12 Sep 2017 17:30:06 +0200
changeset 11701 78675018687946b01eabf8dc9832c31a40ec7442
parent 11700 1be153f559045caa70eb1e5d4b856cb9f0624683
child 11702 7ea4a5d552933fb2de80c60fcc95288c7c7cbb70
push id1794
push usersmalolepszy@mozilla.com
push dateFri, 22 Sep 2017 17:41:45 +0000
reviewersPike
bugs1384570
Bug 1384570 - Part 3 - Encode output into byte strings using utf8. r?Pike MozReview-Commit-ID: 6W1A2JkyGI3
cross-channel-l10n/mozxchannel/merge.py
cross-channel-l10n/tests/__init__.py
cross-channel-l10n/tests/test-merge-properties.py
cross-channel-l10n/tests/util.py
--- a/cross-channel-l10n/mozxchannel/merge.py
+++ b/cross-channel-l10n/mozxchannel/merge.py
@@ -1,12 +1,14 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
 from collections import OrderedDict
+from codecs import encode
+
 
 from compare_locales import parser as cl
 from compare_locales.compare import AddRemove
 
 
 def merge_channels(name, *resources):
     parser = cl.getParser(name)
 
@@ -36,17 +38,17 @@ def merge_channels(name, *resources):
             comments[entity.pre_comment] = entity.key
 
         return (entity.key, entity)
 
     entities = reduce(
         lambda x, y: merge_two(comments, x, y),
         map(parse_resource, resources))
 
-    return serialize_legacy_resource(entities)
+    return encode(serialize_legacy_resource(entities), 'utf8')
 
 
 def merge_two(comments, newer, older):
     diff = AddRemove()
     diff.set_left(newer.keys())
     diff.set_right(older.keys())
 
     def get_entity(key):
new file mode 100644
--- a/cross-channel-l10n/tests/test-merge-properties.py
+++ b/cross-channel-l10n/tests/test-merge-properties.py
@@ -1,20 +1,39 @@
 # coding=utf8
 from __future__ import unicode_literals
 
+import codecs
 import unittest
+
 from mozxchannel.merge import merge_channels
+from .util import encode
 
 
 class TestMergeProperties(unittest.TestCase):
     name = "foo.properties"
 
     def test_no_changes(self):
         channels = ("""
 foo = Foo 1
 """, """
 foo = Foo 2
 """)
         self.assertEqual(
             merge_channels(self.name, *channels), """
 foo = Foo 1
 """)
+
+    def test_encoding(self):
+        channels = encode("""
+foo = Foo 1…
+""", """
+foo = Foo 2…
+""")
+        output = merge_channels(self.name, *channels)
+        self.assertEqual(output, encode("""
+foo = Foo 1…
+"""))
+
+        u_output = codecs.decode(output, "utf8")
+        self.assertEqual(u_output, """
+foo = Foo 1…
+""")
new file mode 100644
--- /dev/null
+++ b/cross-channel-l10n/tests/util.py
@@ -0,0 +1,13 @@
+import codecs
+
+
+def encode(*resources):
+    """Encode a variable number of Unicode strings.
+
+    The compare-locales Parser expects byte-encoded contents which it decodes
+    using the Parser-sepcific encoding.
+    """
+    if len(resources) == 1:
+        return codecs.encode(resources[0], "utf8")
+
+    return [codecs.encode(res, "utf8") for res in resources]