bug 1371601, check for duplicate entities, and duplicate attributes in ftl, r=stas
For Entities, en-US duplicates are warnings, and l10n duplicates are errors.
For ftl attributes, ignore en-US duplicates alltogether, and just warn
on duplicate attributes on l10n. That way, we don't end up skipping the translated
message.
MozReview-Commit-ID: 5fgkOtht43L
--- a/compare_locales/checks.py
+++ b/compare_locales/checks.py
@@ -1,13 +1,14 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import re
+from collections import Counter
from difflib import SequenceMatcher
from xml import sax
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from compare_locales.parser import DTDParser, PropertiesEntity
@@ -440,19 +441,31 @@ class FluentChecker(Checker):
if ref_entry.value is None and l10n_entry.value is not None:
yield ('error', l10n_entry.value.span.start,
'Obsolete value', 'fluent')
# verify that we're having the same set of attributes
ref_attr_names = set((attr.id.name for attr in ref_entry.attributes))
ref_pos = dict((attr.id.name, i)
for i, attr in enumerate(ref_entry.attributes))
- l10n_attr_names = set((attr.id.name for attr in l10n_entry.attributes))
+ l10n_attr_counts = \
+ Counter(attr.id.name for attr in l10n_entry.attributes)
+ l10n_attr_names = set(l10n_attr_counts)
l10n_pos = dict((attr.id.name, i)
for i, attr in enumerate(l10n_entry.attributes))
+ # check for duplicate Attributes
+ # only warn to not trigger a merge skip
+ for attr_name, cnt in l10n_attr_counts.items():
+ if cnt > 1:
+ yield (
+ 'warning',
+ l10n_entry.attributes[l10n_pos[attr_name]].span.start,
+ 'Attribute "{}" occurs {} times'.format(
+ attr_name, cnt),
+ 'fluent')
missing_attr_names = sorted(ref_attr_names - l10n_attr_names,
key=lambda k: ref_pos[k])
for attr_name in missing_attr_names:
yield ('error', l10n_entry.span.start,
'Missing attribute: ' + attr_name, 'fluent')
obsolete_attr_names = sorted(l10n_attr_names - ref_attr_names,
--- a/compare_locales/compare.py
+++ b/compare_locales/compare.py
@@ -469,16 +469,20 @@ class ContentComparer:
ar.set_left(ref_list)
ar.set_right(l10n_list)
report = missing = obsolete = changed = unchanged = keys = 0
missing_w = changed_w = unchanged_w = 0 # word stats
missings = []
skips = []
checker = getChecker(l10n,
reference=ref_entities, extra_tests=extra_tests)
+ for msg in p.findDuplicates(ref_entities):
+ self.notify('warning', l10n, msg)
+ for msg in p.findDuplicates(l10n_entities):
+ self.notify('error', l10n, msg)
for action, entity_id in ar:
if action == 'delete':
# missing entity
if isinstance(ref_entities[ref_map[entity_id]], parser.Junk):
self.notify('warning', l10n, 'Parser error in en-US')
continue
_rv = self.notify('missingEntity', l10n, entity_id)
if _rv == "ignore":
--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -1,15 +1,16 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import re
import bisect
import codecs
+from collections import Counter
import logging
from fluent.syntax import FluentParser as FTLParser
from fluent.syntax import ast as ftl
__constructors = []
@@ -305,16 +306,23 @@ class Parser(object):
return (Junk(ctx, (offset, junkend)), junkend)
def createEntity(self, ctx, m):
pre_comment = unicode(self.last_comment) if self.last_comment else ''
self.last_comment = ''
return Entity(ctx, pre_comment,
*[m.span(i) for i in xrange(6)])
+ @classmethod
+ def findDuplicates(cls, entities):
+ found = Counter(entity.key for entity in entities)
+ for entity_id, cnt in found.items():
+ if cnt > 1:
+ yield '{} occurs {} times'.format(entity_id, cnt)
+
def getParser(path):
for item in __constructors:
if re.search(item[0], path):
return item[1]
raise UserWarning("Cannot find Parser")
--- a/compare_locales/tests/test_merge.py
+++ b/compare_locales/tests/test_merge.py
@@ -157,16 +157,49 @@ eff = leffVal
'unchanged_w': 1
}},
'details': {
'l10n.properties': [
{'obsoleteEntity': u'other'}]
}
})
+ def test_duplicate(self):
+ self.assertTrue(os.path.isdir(self.tmp))
+ self.reference("""foo = fooVal
+bar = barVal
+eff = effVal
+foo = other val for foo""")
+ self.localized("""foo = localized
+bar = lBar
+eff = localized eff
+bar = duplicated bar
+""")
+ cc = ContentComparer([Observer()])
+ cc.compare(File(self.ref, "en-reference.properties", ""),
+ File(self.l10n, "l10n.properties", ""),
+ mozpath.join(self.tmp, "merge", "l10n.properties"))
+ self.assertDictEqual(
+ cc.observers[0].toJSON(),
+ {'summary':
+ {None: {
+ 'errors': 1,
+ 'warnings': 1,
+ 'changed': 3,
+ 'changed_w': 6
+ }},
+ 'details': {
+ 'l10n.properties': [
+ {'warning': u'foo occurs 2 times'},
+ {'error': u'bar occurs 2 times'}]
+ }
+ })
+ mergefile = mozpath.join(self.tmp, "merge", "l10n.properties")
+ self.assertFalse(os.path.isfile(mergefile))
+
class TestDTD(unittest.TestCase, ContentMixin):
extension = '.dtd'
def setUp(self):
self.maxDiff = None
self.tmp = mkdtemp()
os.mkdir(mozpath.join(self.tmp, "merge"))
@@ -686,11 +719,76 @@ bar = lBar
}
}
)
# validate merge results
mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl")
self.assert_(not os.path.exists(mergepath))
+ def test_duplicate(self):
+ self.assertTrue(os.path.isdir(self.tmp))
+ self.reference("""foo = fooVal
+bar = barVal
+eff = effVal
+foo = other val for foo""")
+ self.localized("""foo = localized
+bar = lBar
+eff = localized eff
+bar = duplicated bar
+""")
+ cc = ContentComparer([Observer()])
+ cc.compare(File(self.ref, "en-reference.ftl", ""),
+ File(self.l10n, "l10n.ftl", ""),
+ mozpath.join(self.tmp, "merge", "l10n.ftl"))
+ self.assertDictEqual(
+ cc.observers[0].toJSON(),
+ {'summary':
+ {None: {
+ 'errors': 1,
+ 'warnings': 1,
+ 'changed': 3,
+ 'changed_w': 6
+ }},
+ 'details': {
+ 'l10n.ftl': [
+ {'warning': u'foo occurs 2 times'},
+ {'error': u'bar occurs 2 times'}]
+ }
+ })
+ mergefile = mozpath.join(self.tmp, "merge", "l10n.ftl")
+ self.assertFalse(os.path.isfile(mergefile))
+
+ def test_duplicate_attributes(self):
+ self.assertTrue(os.path.isdir(self.tmp))
+ self.reference("""foo = fooVal
+ .attr = good""")
+ self.localized("""foo = localized
+ .attr = not
+ .attr = so
+ .attr = good
+""")
+ cc = ContentComparer([Observer()])
+ cc.compare(File(self.ref, "en-reference.ftl", ""),
+ File(self.l10n, "l10n.ftl", ""),
+ mozpath.join(self.tmp, "merge", "l10n.ftl"))
+ self.assertDictEqual(
+ cc.observers[0].toJSON(),
+ {'summary':
+ {None: {
+ 'warnings': 1,
+ 'changed': 1,
+ 'changed_w': 2
+ }},
+ 'details': {
+ 'l10n.ftl': [
+ {'warning':
+ u'Attribute "attr" occurs 3 times '
+ u'at line 4, column 5 for foo'
+ }]
+ }
+ })
+ mergefile = mozpath.join(self.tmp, "merge", "l10n.ftl")
+ self.assertFalse(os.path.isfile(mergefile))
+
if __name__ == '__main__':
unittest.main()