Bug 1384570 - Part 4 - Support Whitespace yielded by the compare-locales Parser. r?Pike draft
authorStaś Małolepszy <stas@mozilla.com>
Fri, 22 Sep 2017 11:42:04 +0200
changeset 11702 7ea4a5d552933fb2de80c60fcc95288c7c7cbb70
parent 11701 78675018687946b01eabf8dc9832c31a40ec7442
push id1794
push usersmalolepszy@mozilla.com
push dateFri, 22 Sep 2017 17:41:45 +0000
reviewersPike
bugs1384570
Bug 1384570 - Part 4 - Support Whitespace yielded by the compare-locales Parser. r?Pike MozReview-Commit-ID: KNoJ1SDO5MC
cross-channel-l10n/mozxchannel/merge.py
cross-channel-l10n/tests/test-merge-comments.py
cross-channel-l10n/tests/test-merge-dtd.py
--- a/cross-channel-l10n/mozxchannel/merge.py
+++ b/cross-channel-l10n/mozxchannel/merge.py
@@ -1,12 +1,12 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
 from codecs import encode
 
 
 from compare_locales import parser as cl
 from compare_locales.compare import AddRemove
 
 
 def merge_channels(name, *resources):
@@ -14,25 +14,33 @@ def merge_channels(name, *resources):
 
     if isinstance(parser, cl.FluentParser):
         raise Exception("Fluent files (.ftl) are not supported (bug 1399055).")
 
     # A map of comments to the keys of entities they belong to.
     comments = {}
 
     def parse_resource(resource):
+        # The counter dict keeps track of number of identical comments.
+        counter = defaultdict(int)
         parser.readContents(resource)
-        pairs = [get_key_value(entity) for entity in parser.walk()]
+        pairs = [get_key_value(entity, counter) for entity in parser.walk()]
         return OrderedDict(pairs)
 
-    def get_key_value(entity):
+    def get_key_value(entity, counter):
         if isinstance(entity, cl.Comment):
-            # Comments don't have keys.  We always treat them as unique and try
-            # to match prune duplicates by looking at the entities following
-            # them.
+            counter[entity.all] += 1
+            # Use the (value, index) tuple as the key. AddRemove will
+            # de-deplicate identical comments at the same index.
+            return ((entity.all, counter[entity.all]), entity)
+
+        if isinstance(entity, cl.Whitespace):
+            # Use the Whitespace instance as the key so that it's always
+            # unique. Adjecent whitespace will be folded into the longer one in
+            # prune.
             return (entity, entity)
 
         # When comments change, AddRemove gives us one 'add' and one 'delete'
         # (because a comment's key is its content).  In merge_two we'll try to
         # de-duplicate comments by looking at the entity they belong to.  Set
         # up the back-reference from the comment to its entity here.
         if isinstance(entity, cl.Entity) and entity.pre_comment:
             comments[entity.pre_comment] = entity.key
@@ -55,35 +63,46 @@ def merge_two(comments, newer, older):
         entity = newer.get(key, None)
 
         # Always prefer the newer version.
         if entity is not None:
             return entity
 
         entity = older.get(key)
 
-        # If it's a comment attached to an entity, try to find that entity in
-        # newer and use its comment instead.
+        # If it's an old comment attached to an entity, try to find that
+        # entity in newer and return None to use its comment instead in prune.
         if isinstance(entity, cl.Comment) and entity in comments:
             next_entity = newer.get(comments[entity], None)
             if next_entity is not None and next_entity.pre_comment:
                 # We'll prune this before returning the merged result.
                 return None
 
         return entity
 
     contents = [(key, get_entity(key)) for _, key in diff]
-    # Prune Nones which stand for duplicated comments.
-    pruned = [(k, v) for k, v in contents if v is not None]
+
+    def prune(acc, cur):
+        _, entity = cur
+        if entity is None:
+            # Prune Nones which stand for duplicated comments.
+            return acc
+
+        if len(acc) and isinstance(entity, cl.Whitespace):
+            _, prev_entity = acc[-1]
+
+            if isinstance(prev_entity, cl.Whitespace):
+                # Prefer the longer whitespace.
+                if len(prev_entity.all) > len(entity.all):
+                    acc[-1] = (prev_entity, prev_entity)
+                else:
+                    acc[-1] = (entity, entity)
+                return acc
+
+        acc.append(cur)
+        return acc
+
+    pruned = reduce(prune, contents, [])
     return OrderedDict(pruned)
 
 
 def serialize_legacy_resource(entities):
-    return reduce(serialize_legacy_entity, entities.values(), "")
-
-
-def serialize_legacy_entity(acc, entity):
-    # Ensure there's a newline at the end of the resource content so far before
-    # we append the current entity's content.
-    if acc and not acc.endswith("\n"):
-        acc += "\n"
-
-    return acc + entity.all
+    return "".join((entity.all for entity in entities.values()))
--- a/cross-channel-l10n/tests/test-merge-comments.py
+++ b/cross-channel-l10n/tests/test-merge-comments.py
@@ -12,50 +12,50 @@ class TestMergeComments(unittest.TestCas
         channels = ("""
 foo = Foo 1
 # Bar Comment 1
 bar = Bar 1
 """, """
 foo = Foo 2
 bar = Bar 2
 """)
-        self.assertEqual(
+        self.assertMultiLineEqual(
             merge_channels(self.name, *channels), """
 foo = Foo 1
 # Bar Comment 1
 bar = Bar 1
 """)
 
     def test_comment_still_in_last(self):
         channels = ("""
 foo = Foo 1
 bar = Bar 1
 """, """
 foo = Foo 2
 # Bar Comment 2
 bar = Bar 2
 """)
-        self.assertEqual(
+        self.assertMultiLineEqual(
             merge_channels(self.name, *channels), """
 foo = Foo 1
 # Bar Comment 2
 bar = Bar 1
 """)
 
     def test_comment_changed(self):
         channels = ("""
 foo = Foo 1
 # Bar Comment 1
 bar = Bar 1
 """, """
 foo = Foo 2
 # Bar Comment 2
 bar = Bar 2
 """)
-        self.assertEqual(
+        self.assertMultiLineEqual(
             merge_channels(self.name, *channels), """
 foo = Foo 1
 # Bar Comment 1
 bar = Bar 1
 """)
 
 
 class TestMergeStandaloneComments(unittest.TestCase):
@@ -66,34 +66,35 @@ class TestMergeStandaloneComments(unitte
 # Standalone Comment 1
 
 # Foo Comment 1
 foo = Foo 1
 """, """
 # Foo Comment 2
 foo = Foo 2
 """)
-        self.assertEqual(
+        self.assertMultiLineEqual(
             merge_channels(self.name, *channels), """
 # Standalone Comment 1
+
 # Foo Comment 1
 foo = Foo 1
 """)
 
     def test_comment_still_in_last(self):
         channels = ("""
 # Foo Comment 1
 foo = Foo 1
 """, """
 # Standalone Comment 2
 
 # Foo Comment 2
 foo = Foo 2
 """)
-        self.assertEqual(
+        self.assertMultiLineEqual(
             merge_channels(self.name, *channels), """
 # Standalone Comment 2
 
 # Foo Comment 1
 foo = Foo 1
 """)
 
     def test_comments_in_both(self):
@@ -103,37 +104,82 @@ foo = Foo 1
 # Foo Comment 1
 foo = Foo 1
 """, """
 # Standalone Comment 2
 
 # Foo Comment 2
 foo = Foo 2
 """)
-        self.assertEqual(
+        self.assertMultiLineEqual(
             merge_channels(self.name, *channels), """
 # Standalone Comment 2
 
 # Standalone Comment 1
+
 # Foo Comment 1
 foo = Foo 1
 """)
 
     def test_identical_comments_in_both(self):
         channels = ("""
 # Standalone Comment
 
 # Foo Comment 1
 foo = Foo 1
 """, """
 # Standalone Comment
 
 # Foo Comment 2
 foo = Foo 2
 """)
-        self.assertEqual(
+        self.assertMultiLineEqual(
             merge_channels(self.name, *channels), """
 # Standalone Comment
 
-# Standalone Comment
 # Foo Comment 1
 foo = Foo 1
 """)
+
+    def test_standalone_which_is_attached_in_first(self):
+        channels = ("""
+# Ambiguous Comment
+foo = Foo 1
+
+# Bar Comment 1
+bar = Bar 1
+""", """
+# Ambiguous Comment
+
+# Bar Comment 2
+bar = Bar 2
+""")
+        self.assertMultiLineEqual(
+            merge_channels(self.name, *channels), """
+# Ambiguous Comment
+
+foo = Foo 1
+
+# Bar Comment 1
+bar = Bar 1
+""")
+
+    def test_standalone_which_is_attached_in_second(self):
+        channels = ("""
+# Ambiguous Comment
+
+# Bar Comment 1
+bar = Bar 1
+""", """
+# Ambiguous Comment
+foo = Foo 1
+
+# Bar Comment 2
+bar = Bar 2
+""")
+        self.assertMultiLineEqual(
+            merge_channels(self.name, *channels), """
+# Ambiguous Comment
+foo = Foo 1
+
+# Bar Comment 1
+bar = Bar 1
+""")
--- a/cross-channel-l10n/tests/test-merge-dtd.py
+++ b/cross-channel-l10n/tests/test-merge-dtd.py
@@ -2,19 +2,122 @@
 from __future__ import unicode_literals
 
 import unittest
 from mozxchannel.merge import merge_channels
 
 
 class TestMergeDTD(unittest.TestCase):
     name = "foo.dtd"
+    maxDiff = None
 
     def test_no_changes(self):
         channels = ("""
 <!ENTITY foo "Foo 1">
 """, """
 <!ENTITY foo "Foo 2">
 """)
         self.assertEqual(
             merge_channels(self.name, *channels), """
 <!ENTITY foo "Foo 1">
 """)
+
+    def test_browser_dtd(self):
+        channels = ("""\
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+   - License, v. 2.0. If a copy of the MPL was not distributed with this
+   - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<!-- LOCALIZATION NOTE : FILE This file contains the browser main menu items -->
+<!-- LOCALIZATION NOTE : FILE Do not translate commandkeys -->
+
+<!-- LOCALIZATION NOTE (mainWindow.titlemodifier) : DONT_TRANSLATE -->
+<!ENTITY mainWindow.titlemodifier "&brandFullName;">
+<!-- LOCALIZATION NOTE (mainWindow.titlemodifiermenuseparator): DONT_TRANSLATE -->
+<!ENTITY mainWindow.titlemodifiermenuseparator " - ">
+<!-- LOCALIZATION NOTE (mainWindow.titlePrivateBrowsingSuffix2): This will be appended to the window's title
+                                                                inside the private browsing mode -->
+<!ENTITY mainWindow.titlePrivateBrowsingSuffix2 "(Private Browsing)">
+""", """\
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+   - License, v. 2.0. If a copy of the MPL was not distributed with this
+   - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<!-- LOCALIZATION NOTE : FILE This file contains the browser main menu items -->
+<!-- LOCALIZATION NOTE : FILE Do not translate commandkeys -->
+
+<!-- LOCALIZATION NOTE (mainWindow.title): DONT_TRANSLATE -->
+<!ENTITY mainWindow.title "&brandFullName;">
+<!-- LOCALIZATION NOTE (mainWindow.titlemodifier) : DONT_TRANSLATE -->
+<!ENTITY mainWindow.titlemodifier "&brandFullName;">
+<!-- LOCALIZATION NOTE (mainWindow.titlePrivateBrowsingSuffix): This will be appended to the window's title
+                                                                inside the private browsing mode -->
+<!ENTITY mainWindow.titlePrivateBrowsingSuffix "(Private Browsing)">
+""")
+
+        self.assertMultiLineEqual(
+            merge_channels(self.name, *channels), """\
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+   - License, v. 2.0. If a copy of the MPL was not distributed with this
+   - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<!-- LOCALIZATION NOTE : FILE This file contains the browser main menu items -->
+<!-- LOCALIZATION NOTE : FILE Do not translate commandkeys -->
+
+<!-- LOCALIZATION NOTE (mainWindow.title): DONT_TRANSLATE -->
+<!ENTITY mainWindow.title "&brandFullName;">
+
+<!-- LOCALIZATION NOTE (mainWindow.titlemodifier) : DONT_TRANSLATE -->
+<!ENTITY mainWindow.titlemodifier "&brandFullName;">
+<!-- LOCALIZATION NOTE (mainWindow.titlePrivateBrowsingSuffix): This will be appended to the window's title
+                                                                inside the private browsing mode -->
+<!ENTITY mainWindow.titlePrivateBrowsingSuffix "(Private Browsing)">
+<!-- LOCALIZATION NOTE (mainWindow.titlemodifiermenuseparator): DONT_TRANSLATE -->
+<!ENTITY mainWindow.titlemodifiermenuseparator " - ">
+<!-- LOCALIZATION NOTE (mainWindow.titlePrivateBrowsingSuffix2): This will be appended to the window's title
+                                                                inside the private browsing mode -->
+<!ENTITY mainWindow.titlePrivateBrowsingSuffix2 "(Private Browsing)">
+""")
+
+    def test_aboutServiceWorkers_dtd(self):
+        channels = ("""\
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+   - License, v. 2.0. If a copy of the MPL was not distributed with this
+   - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. -->
+<!ENTITY aboutServiceWorkers.title                     "About Service Workers">
+<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. -->
+<!ENTITY aboutServiceWorkers.maintitle                 "Registered Service Workers">
+<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. -->
+<!ENTITY aboutServiceWorkers.warning_not_enabled       "Service Workers are not enabled.">
+<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. -->
+<!ENTITY aboutServiceWorkers.warning_no_serviceworkers "No Service Workers registered.">
+""", """\
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+   - License, v. 2.0. If a copy of the MPL was not distributed with this
+   - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. -->
+<!ENTITY aboutServiceWorkers.title                     "About Service Workers">
+<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. -->
+<!ENTITY aboutServiceWorkers.maintitle                 "Registered Service Workers">
+<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. -->
+<!ENTITY aboutServiceWorkers.warning_not_enabled       "Service Workers are not enabled.">
+<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. -->
+<!ENTITY aboutServiceWorkers.warning_no_serviceworkers "No Service Workers registered.">
+""")
+
+        self.assertMultiLineEqual(
+            merge_channels(self.name, *channels), """\
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+   - License, v. 2.0. If a copy of the MPL was not distributed with this
+   - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. -->
+<!ENTITY aboutServiceWorkers.title                     "About Service Workers">
+<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. -->
+<!ENTITY aboutServiceWorkers.maintitle                 "Registered Service Workers">
+<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. -->
+<!ENTITY aboutServiceWorkers.warning_not_enabled       "Service Workers are not enabled.">
+<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. -->
+<!ENTITY aboutServiceWorkers.warning_no_serviceworkers "No Service Workers registered.">
+""")