bug 1371601, check for duplicate entities, and duplicate attributes in ftl, r=stas
authorAxel Hecht <axel@pike.org>
Fri, 09 Jun 2017 16:19:35 +0200
changeset 282 0dcd32f2781955a4b8f63e680af8de4d901497cc
parent 281 a5d2a6476e667c61fbb8c180df14b2f75f9ef2b1
child 283 2312a685f7602175cf08ca9d7647ed18056565c0
child 285 dd3d1f7841ab309a39e2051e2870f2da393f6b2e
push id79
push useraxel@mozilla.com
push dateWed, 28 Jun 2017 15:41:46 +0000
reviewersstas
bugs1371601
bug 1371601, check for duplicate entities, and duplicate attributes in ftl, r=stas For Entities, en-US duplicates are warnings, and l10n duplicates are errors. For ftl attributes, ignore en-US duplicates alltogether, and just warn on duplicate attributes on l10n. That way, we don't end up skipping the translated message. MozReview-Commit-ID: 5fgkOtht43L
compare_locales/checks.py
compare_locales/compare.py
compare_locales/parser.py
compare_locales/tests/test_merge.py
--- a/compare_locales/checks.py
+++ b/compare_locales/checks.py
@@ -1,13 +1,14 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 import re
+from collections import Counter
 from difflib import SequenceMatcher
 from xml import sax
 try:
     from cStringIO import StringIO
 except ImportError:
     from StringIO import StringIO
 
 from compare_locales.parser import DTDParser, PropertiesEntity
@@ -440,19 +441,31 @@ class FluentChecker(Checker):
         if ref_entry.value is None and l10n_entry.value is not None:
             yield ('error', l10n_entry.value.span.start,
                    'Obsolete value', 'fluent')
 
         # verify that we're having the same set of attributes
         ref_attr_names = set((attr.id.name for attr in ref_entry.attributes))
         ref_pos = dict((attr.id.name, i)
                        for i, attr in enumerate(ref_entry.attributes))
-        l10n_attr_names = set((attr.id.name for attr in l10n_entry.attributes))
+        l10n_attr_counts = \
+            Counter(attr.id.name for attr in l10n_entry.attributes)
+        l10n_attr_names = set(l10n_attr_counts)
         l10n_pos = dict((attr.id.name, i)
                         for i, attr in enumerate(l10n_entry.attributes))
+        # check for duplicate Attributes
+        # only warn to not trigger a merge skip
+        for attr_name, cnt in l10n_attr_counts.items():
+            if cnt > 1:
+                yield (
+                    'warning',
+                    l10n_entry.attributes[l10n_pos[attr_name]].span.start,
+                    'Attribute "{}" occurs {} times'.format(
+                        attr_name, cnt),
+                    'fluent')
 
         missing_attr_names = sorted(ref_attr_names - l10n_attr_names,
                                     key=lambda k: ref_pos[k])
         for attr_name in missing_attr_names:
             yield ('error', l10n_entry.span.start,
                    'Missing attribute: ' + attr_name, 'fluent')
 
         obsolete_attr_names = sorted(l10n_attr_names - ref_attr_names,
--- a/compare_locales/compare.py
+++ b/compare_locales/compare.py
@@ -469,16 +469,20 @@ class ContentComparer:
         ar.set_left(ref_list)
         ar.set_right(l10n_list)
         report = missing = obsolete = changed = unchanged = keys = 0
         missing_w = changed_w = unchanged_w = 0  # word stats
         missings = []
         skips = []
         checker = getChecker(l10n,
                              reference=ref_entities, extra_tests=extra_tests)
+        for msg in p.findDuplicates(ref_entities):
+            self.notify('warning', l10n, msg)
+        for msg in p.findDuplicates(l10n_entities):
+            self.notify('error', l10n, msg)
         for action, entity_id in ar:
             if action == 'delete':
                 # missing entity
                 if isinstance(ref_entities[ref_map[entity_id]], parser.Junk):
                     self.notify('warning', l10n, 'Parser error in en-US')
                     continue
                 _rv = self.notify('missingEntity', l10n, entity_id)
                 if _rv == "ignore":
--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -1,15 +1,16 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 import re
 import bisect
 import codecs
+from collections import Counter
 import logging
 
 from fluent.syntax import FluentParser as FTLParser
 from fluent.syntax import ast as ftl
 
 __constructors = []
 
 
@@ -305,16 +306,23 @@ class Parser(object):
         return (Junk(ctx, (offset, junkend)), junkend)
 
     def createEntity(self, ctx, m):
         pre_comment = unicode(self.last_comment) if self.last_comment else ''
         self.last_comment = ''
         return Entity(ctx, pre_comment,
                       *[m.span(i) for i in xrange(6)])
 
+    @classmethod
+    def findDuplicates(cls, entities):
+        found = Counter(entity.key for entity in entities)
+        for entity_id, cnt in found.items():
+            if cnt > 1:
+                yield '{} occurs {} times'.format(entity_id, cnt)
+
 
 def getParser(path):
     for item in __constructors:
         if re.search(item[0], path):
             return item[1]
     raise UserWarning("Cannot find Parser")
 
 
--- a/compare_locales/tests/test_merge.py
+++ b/compare_locales/tests/test_merge.py
@@ -157,16 +157,49 @@ eff = leffVal
                     'unchanged_w': 1
                 }},
              'details': {
                  'l10n.properties': [
                      {'obsoleteEntity': u'other'}]
                 }
              })
 
+    def test_duplicate(self):
+        self.assertTrue(os.path.isdir(self.tmp))
+        self.reference("""foo = fooVal
+bar = barVal
+eff = effVal
+foo = other val for foo""")
+        self.localized("""foo = localized
+bar = lBar
+eff = localized eff
+bar = duplicated bar
+""")
+        cc = ContentComparer([Observer()])
+        cc.compare(File(self.ref, "en-reference.properties", ""),
+                   File(self.l10n, "l10n.properties", ""),
+                   mozpath.join(self.tmp, "merge", "l10n.properties"))
+        self.assertDictEqual(
+            cc.observers[0].toJSON(),
+            {'summary':
+                {None: {
+                    'errors': 1,
+                    'warnings': 1,
+                    'changed': 3,
+                    'changed_w': 6
+                }},
+             'details': {
+                 'l10n.properties': [
+                     {'warning': u'foo occurs 2 times'},
+                     {'error': u'bar occurs 2 times'}]
+                }
+             })
+        mergefile = mozpath.join(self.tmp, "merge", "l10n.properties")
+        self.assertFalse(os.path.isfile(mergefile))
+
 
 class TestDTD(unittest.TestCase, ContentMixin):
     extension = '.dtd'
 
     def setUp(self):
         self.maxDiff = None
         self.tmp = mkdtemp()
         os.mkdir(mozpath.join(self.tmp, "merge"))
@@ -686,11 +719,76 @@ bar = lBar
                 }
             }
         )
 
         # validate merge results
         mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl")
         self.assert_(not os.path.exists(mergepath))
 
+    def test_duplicate(self):
+        self.assertTrue(os.path.isdir(self.tmp))
+        self.reference("""foo = fooVal
+bar = barVal
+eff = effVal
+foo = other val for foo""")
+        self.localized("""foo = localized
+bar = lBar
+eff = localized eff
+bar = duplicated bar
+""")
+        cc = ContentComparer([Observer()])
+        cc.compare(File(self.ref, "en-reference.ftl", ""),
+                   File(self.l10n, "l10n.ftl", ""),
+                   mozpath.join(self.tmp, "merge", "l10n.ftl"))
+        self.assertDictEqual(
+            cc.observers[0].toJSON(),
+            {'summary':
+                {None: {
+                    'errors': 1,
+                    'warnings': 1,
+                    'changed': 3,
+                    'changed_w': 6
+                }},
+             'details': {
+                 'l10n.ftl': [
+                     {'warning': u'foo occurs 2 times'},
+                     {'error': u'bar occurs 2 times'}]
+                }
+             })
+        mergefile = mozpath.join(self.tmp, "merge", "l10n.ftl")
+        self.assertFalse(os.path.isfile(mergefile))
+
+    def test_duplicate_attributes(self):
+        self.assertTrue(os.path.isdir(self.tmp))
+        self.reference("""foo = fooVal
+    .attr = good""")
+        self.localized("""foo = localized
+    .attr = not
+    .attr = so
+    .attr = good
+""")
+        cc = ContentComparer([Observer()])
+        cc.compare(File(self.ref, "en-reference.ftl", ""),
+                   File(self.l10n, "l10n.ftl", ""),
+                   mozpath.join(self.tmp, "merge", "l10n.ftl"))
+        self.assertDictEqual(
+            cc.observers[0].toJSON(),
+            {'summary':
+                {None: {
+                    'warnings': 1,
+                    'changed': 1,
+                    'changed_w': 2
+                }},
+             'details': {
+                 'l10n.ftl': [
+                     {'warning':
+                      u'Attribute "attr" occurs 3 times '
+                      u'at line 4, column 5 for foo'
+                      }]
+                }
+             })
+        mergefile = mozpath.join(self.tmp, "merge", "l10n.ftl")
+        self.assertFalse(os.path.isfile(mergefile))
+
 
 if __name__ == '__main__':
     unittest.main()