Bug 1384570 - Part 2 - De-duplicate comments. r?Pike draft
authorStaś Małolepszy <stas@mozilla.com>
Tue, 05 Sep 2017 15:13:34 +0200
changeset 11700 1be153f559045caa70eb1e5d4b856cb9f0624683
parent 11699 bceee34aacb6ca188cf2ea661dc5285e0c620858
child 11701 78675018687946b01eabf8dc9832c31a40ec7442
push id1794
push usersmalolepszy@mozilla.com
push dateFri, 22 Sep 2017 17:41:45 +0000
reviewersPike
bugs1384570
Bug 1384570 - Part 2 - De-duplicate comments. r?Pike MozReview-Commit-ID: FYghdZh06tJ
cross-channel-l10n/mozxchannel/merge.py
cross-channel-l10n/tests/test-merge-comments.py
--- a/cross-channel-l10n/mozxchannel/merge.py
+++ b/cross-channel-l10n/mozxchannel/merge.py
@@ -8,41 +8,75 @@ from compare_locales.compare import AddR
 
 
 def merge_channels(name, *resources):
     parser = cl.getParser(name)
 
     if isinstance(parser, cl.FluentParser):
         raise Exception("Fluent files (.ftl) are not supported (bug 1399055).")
 
+    # A map of comments to the keys of entities they belong to.
+    comments = {}
+
     def parse_resource(resource):
         parser.readContents(resource)
-        pairs = [(get_key(entity), entity) for entity in parser.walk()]
+        pairs = [get_key_value(entity) for entity in parser.walk()]
         return OrderedDict(pairs)
 
-    def get_key(entity):
+    def get_key_value(entity):
         if isinstance(entity, cl.Comment):
-            return entity.all
-        return entity.key
+            # Comments don't have keys.  We always treat them as unique and try
+            # to match prune duplicates by looking at the entities following
+            # them.
+            return (entity, entity)
 
-    entities = reduce(merge_two, map(parse_resource, resources))
+        # When comments change, AddRemove gives us one 'add' and one 'delete'
+        # (because a comment's key is its content).  In merge_two we'll try to
+        # de-duplicate comments by looking at the entity they belong to.  Set
+        # up the back-reference from the comment to its entity here.
+        if isinstance(entity, cl.Entity) and entity.pre_comment:
+            comments[entity.pre_comment] = entity.key
+
+        return (entity.key, entity)
+
+    entities = reduce(
+        lambda x, y: merge_two(comments, x, y),
+        map(parse_resource, resources))
 
     return serialize_legacy_resource(entities)
 
 
-def merge_two(newer, older):
+def merge_two(comments, newer, older):
     diff = AddRemove()
     diff.set_left(newer.keys())
     diff.set_right(older.keys())
 
     def get_entity(key):
-        return newer.get(key, older.get(key))
+        entity = newer.get(key, None)
+
+        # Always prefer the newer version.
+        if entity is not None:
+            return entity
+
+        entity = older.get(key)
+
+        # If it's a comment attached to an entity, try to find that entity in
+        # newer and use its comment instead.
+        if isinstance(entity, cl.Comment) and entity in comments:
+            next_entity = newer.get(comments[entity], None)
+            if next_entity is not None and next_entity.pre_comment:
+                # We'll prune this before returning the merged result.
+                return None
+
+        return entity
 
     contents = [(key, get_entity(key)) for _, key in diff]
-    return OrderedDict(contents)
+    # Prune Nones which stand for duplicated comments.
+    pruned = [(k, v) for k, v in contents if v is not None]
+    return OrderedDict(pruned)
 
 
 def serialize_legacy_resource(entities):
     return reduce(serialize_legacy_entity, entities.values(), "")
 
 
 def serialize_legacy_entity(acc, entity):
     # Ensure there's a newline at the end of the resource content so far before
--- a/cross-channel-l10n/tests/test-merge-comments.py
+++ b/cross-channel-l10n/tests/test-merge-comments.py
@@ -48,12 +48,92 @@ bar = Bar 1
 """, """
 foo = Foo 2
 # Bar Comment 2
 bar = Bar 2
 """)
         self.assertEqual(
             merge_channels(self.name, *channels), """
 foo = Foo 1
-# Bar Comment 2
 # Bar Comment 1
 bar = Bar 1
 """)
+
+
+class TestMergeStandaloneComments(unittest.TestCase):
+    name = "foo.properties"
+
+    def test_comment_added_in_first(self):
+        channels = ("""
+# Standalone Comment 1
+
+# Foo Comment 1
+foo = Foo 1
+""", """
+# Foo Comment 2
+foo = Foo 2
+""")
+        self.assertEqual(
+            merge_channels(self.name, *channels), """
+# Standalone Comment 1
+# Foo Comment 1
+foo = Foo 1
+""")
+
+    def test_comment_still_in_last(self):
+        channels = ("""
+# Foo Comment 1
+foo = Foo 1
+""", """
+# Standalone Comment 2
+
+# Foo Comment 2
+foo = Foo 2
+""")
+        self.assertEqual(
+            merge_channels(self.name, *channels), """
+# Standalone Comment 2
+
+# Foo Comment 1
+foo = Foo 1
+""")
+
+    def test_comments_in_both(self):
+        channels = ("""
+# Standalone Comment 1
+
+# Foo Comment 1
+foo = Foo 1
+""", """
+# Standalone Comment 2
+
+# Foo Comment 2
+foo = Foo 2
+""")
+        self.assertEqual(
+            merge_channels(self.name, *channels), """
+# Standalone Comment 2
+
+# Standalone Comment 1
+# Foo Comment 1
+foo = Foo 1
+""")
+
+    def test_identical_comments_in_both(self):
+        channels = ("""
+# Standalone Comment
+
+# Foo Comment 1
+foo = Foo 1
+""", """
+# Standalone Comment
+
+# Foo Comment 2
+foo = Foo 2
+""")
+        self.assertEqual(
+            merge_channels(self.name, *channels), """
+# Standalone Comment
+
+# Standalone Comment
+# Foo Comment 1
+foo = Foo 1
+""")