Bug 1384570 - Part 3 - Encode output into byte strings using utf8. r?Pike
MozReview-Commit-ID: 6W1A2JkyGI3
--- a/cross-channel-l10n/mozxchannel/merge.py
+++ b/cross-channel-l10n/mozxchannel/merge.py
@@ -1,12 +1,14 @@
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from collections import OrderedDict
+from codecs import encode
+
from compare_locales import parser as cl
from compare_locales.compare import AddRemove
def merge_channels(name, *resources):
parser = cl.getParser(name)
@@ -36,17 +38,17 @@ def merge_channels(name, *resources):
comments[entity.pre_comment] = entity.key
return (entity.key, entity)
entities = reduce(
lambda x, y: merge_two(comments, x, y),
map(parse_resource, resources))
- return serialize_legacy_resource(entities)
+ return encode(serialize_legacy_resource(entities), 'utf8')
def merge_two(comments, newer, older):
diff = AddRemove()
diff.set_left(newer.keys())
diff.set_right(older.keys())
def get_entity(key):
--- a/cross-channel-l10n/tests/test-merge-properties.py
+++ b/cross-channel-l10n/tests/test-merge-properties.py
@@ -1,20 +1,39 @@
# coding=utf8
from __future__ import unicode_literals
+import codecs
import unittest
+
from mozxchannel.merge import merge_channels
+from .util import encode
class TestMergeProperties(unittest.TestCase):
name = "foo.properties"
def test_no_changes(self):
channels = ("""
foo = Foo 1
""", """
foo = Foo 2
""")
self.assertEqual(
merge_channels(self.name, *channels), """
foo = Foo 1
""")
+
+ def test_encoding(self):
+ channels = encode("""
+foo = Foo 1…
+""", """
+foo = Foo 2…
+""")
+ output = merge_channels(self.name, *channels)
+ self.assertEqual(output, encode("""
+foo = Foo 1…
+"""))
+
+ u_output = codecs.decode(output, "utf8")
+ self.assertEqual(u_output, """
+foo = Foo 1…
+""")
new file mode 100644
--- /dev/null
+++ b/cross-channel-l10n/tests/util.py
@@ -0,0 +1,13 @@
+import codecs
+
+
+def encode(*resources):
+ """Encode a variable number of Unicode strings.
+
+ The compare-locales Parser expects byte-encoded contents which it decodes
+ using the Parser-sepcific encoding.
+ """
+ if len(resources) == 1:
+ return codecs.encode(resources[0], "utf8")
+
+ return [codecs.encode(res, "utf8") for res in resources]