Bug 1384570 - Part 2 - De-duplicate comments. r?Pike
MozReview-Commit-ID: FYghdZh06tJ
--- a/cross-channel-l10n/mozxchannel/merge.py
+++ b/cross-channel-l10n/mozxchannel/merge.py
@@ -8,41 +8,75 @@ from compare_locales.compare import AddR
def merge_channels(name, *resources):
parser = cl.getParser(name)
if isinstance(parser, cl.FluentParser):
raise Exception("Fluent files (.ftl) are not supported (bug 1399055).")
+ # A map of comments to the keys of entities they belong to.
+ comments = {}
+
def parse_resource(resource):
parser.readContents(resource)
- pairs = [(get_key(entity), entity) for entity in parser.walk()]
+ pairs = [get_key_value(entity) for entity in parser.walk()]
return OrderedDict(pairs)
- def get_key(entity):
+ def get_key_value(entity):
if isinstance(entity, cl.Comment):
- return entity.all
- return entity.key
+ # Comments don't have keys. We always treat them as unique and try
+ # to match prune duplicates by looking at the entities following
+ # them.
+ return (entity, entity)
- entities = reduce(merge_two, map(parse_resource, resources))
+ # When comments change, AddRemove gives us one 'add' and one 'delete'
+ # (because a comment's key is its content). In merge_two we'll try to
+ # de-duplicate comments by looking at the entity they belong to. Set
+ # up the back-reference from the comment to its entity here.
+ if isinstance(entity, cl.Entity) and entity.pre_comment:
+ comments[entity.pre_comment] = entity.key
+
+ return (entity.key, entity)
+
+ entities = reduce(
+ lambda x, y: merge_two(comments, x, y),
+ map(parse_resource, resources))
return serialize_legacy_resource(entities)
-def merge_two(newer, older):
+def merge_two(comments, newer, older):
diff = AddRemove()
diff.set_left(newer.keys())
diff.set_right(older.keys())
def get_entity(key):
- return newer.get(key, older.get(key))
+ entity = newer.get(key, None)
+
+ # Always prefer the newer version.
+ if entity is not None:
+ return entity
+
+ entity = older.get(key)
+
+ # If it's a comment attached to an entity, try to find that entity in
+ # newer and use its comment instead.
+ if isinstance(entity, cl.Comment) and entity in comments:
+ next_entity = newer.get(comments[entity], None)
+ if next_entity is not None and next_entity.pre_comment:
+ # We'll prune this before returning the merged result.
+ return None
+
+ return entity
contents = [(key, get_entity(key)) for _, key in diff]
- return OrderedDict(contents)
+ # Prune Nones which stand for duplicated comments.
+ pruned = [(k, v) for k, v in contents if v is not None]
+ return OrderedDict(pruned)
def serialize_legacy_resource(entities):
return reduce(serialize_legacy_entity, entities.values(), "")
def serialize_legacy_entity(acc, entity):
# Ensure there's a newline at the end of the resource content so far before
--- a/cross-channel-l10n/tests/test-merge-comments.py
+++ b/cross-channel-l10n/tests/test-merge-comments.py
@@ -48,12 +48,92 @@ bar = Bar 1
""", """
foo = Foo 2
# Bar Comment 2
bar = Bar 2
""")
self.assertEqual(
merge_channels(self.name, *channels), """
foo = Foo 1
-# Bar Comment 2
# Bar Comment 1
bar = Bar 1
""")
+
+
+class TestMergeStandaloneComments(unittest.TestCase):
+ name = "foo.properties"
+
+ def test_comment_added_in_first(self):
+ channels = ("""
+# Standalone Comment 1
+
+# Foo Comment 1
+foo = Foo 1
+""", """
+# Foo Comment 2
+foo = Foo 2
+""")
+ self.assertEqual(
+ merge_channels(self.name, *channels), """
+# Standalone Comment 1
+# Foo Comment 1
+foo = Foo 1
+""")
+
+ def test_comment_still_in_last(self):
+ channels = ("""
+# Foo Comment 1
+foo = Foo 1
+""", """
+# Standalone Comment 2
+
+# Foo Comment 2
+foo = Foo 2
+""")
+ self.assertEqual(
+ merge_channels(self.name, *channels), """
+# Standalone Comment 2
+
+# Foo Comment 1
+foo = Foo 1
+""")
+
+ def test_comments_in_both(self):
+ channels = ("""
+# Standalone Comment 1
+
+# Foo Comment 1
+foo = Foo 1
+""", """
+# Standalone Comment 2
+
+# Foo Comment 2
+foo = Foo 2
+""")
+ self.assertEqual(
+ merge_channels(self.name, *channels), """
+# Standalone Comment 2
+
+# Standalone Comment 1
+# Foo Comment 1
+foo = Foo 1
+""")
+
+ def test_identical_comments_in_both(self):
+ channels = ("""
+# Standalone Comment
+
+# Foo Comment 1
+foo = Foo 1
+""", """
+# Standalone Comment
+
+# Foo Comment 2
+foo = Foo 2
+""")
+ self.assertEqual(
+ merge_channels(self.name, *channels), """
+# Standalone Comment
+
+# Standalone Comment
+# Foo Comment 1
+foo = Foo 1
+""")