bug 1371342, don't count parse errors in en-US as missing strings, warn on them, r=stas
Improve our handling of errors in the reference files. Don't count
Junk entries as missing strings, and add a warning about them.
We already warned about parsing errors inside DTD values, but had no
test for that, now we have that test.
MozReview-Commit-ID: 5pLs0xEyE6h
--- a/compare_locales/compare.py
+++ b/compare_locales/compare.py
@@ -458,16 +458,19 @@ class ContentComparer:
report = missing = obsolete = changed = unchanged = keys = 0
missing_w = changed_w = unchanged_w = 0 # word stats
missings = []
skips = []
checker = getChecker(l10n, reference=ref[0], extra_tests=extra_tests)
for action, entity in ar:
if action == 'delete':
# missing entity
+ if isinstance(ref[0][ref[1][entity]], parser.Junk):
+ self.notify('warning', l10n, 'Parser error in en-US')
+ continue
_rv = self.notify('missingEntity', l10n, entity)
if _rv == "ignore":
continue
if _rv == "error":
# only add to missing entities for l10n-merge on error,
# not report
missings.append(entity)
missing += 1
@@ -558,17 +561,19 @@ class ContentComparer:
except UserWarning:
return
try:
p.readContents(f.getContents())
entities, map = p.parse()
except Exception, e:
self.notify('error', f, str(e))
return
- self.updateStats(missing, {'missingInFiles': len(map)})
+ # strip parse errors
+ entities = [e for e in entities if not isinstance(e, parser.Junk)]
+ self.updateStats(missing, {'missingInFiles': len(entities)})
missing_w = 0
for e in entities:
missing_w += self.countWords(e.val)
self.updateStats(missing, {'missing_w': missing_w})
def doUnchanged(self, entity):
# overload this if needed
pass
--- a/compare_locales/tests/test_merge.py
+++ b/compare_locales/tests/test_merge.py
@@ -268,11 +268,66 @@ class TestDTD(unittest.TestCase, Content
})
mergefile = mozpath.join(self.tmp, "merge", "l10n.dtd")
self.assertTrue(os.path.isfile(mergefile))
p = getParser(mergefile)
p.readFile(mergefile)
[m, n] = p.parse()
self.assertEqual(map(lambda e: e.key, m), ["foo", "eff", "bar"])
+ def test_reference_junk(self):
+ self.assertTrue(os.path.isdir(self.tmp))
+ self.reference("""<!ENTITY foo 'fooVal'>
+<!ENT bar 'bad val'>
+<!ENTITY eff 'effVal'>""")
+ self.localized("""<!ENTITY foo 'fooVal'>
+<!ENTITY eff 'effVal'>
+""")
+ cc = ContentComparer([Observer()])
+ cc.compare(File(self.ref, "en-reference.dtd", ""),
+ File(self.l10n, "l10n.dtd", ""),
+ mozpath.join(self.tmp, "merge", "l10n.dtd"))
+ self.assertDictEqual(
+ cc.observers[0].toJSON(),
+ {'summary':
+ {None: {
+ 'warnings': 1,
+ 'unchanged': 2,
+ 'unchanged_w': 2
+ }},
+ 'details': {
+ 'l10n.dtd': [
+ {'warning': 'Parser error in en-US'}]
+ }
+ })
+
+ def test_reference_xml_error(self):
+ self.assertTrue(os.path.isdir(self.tmp))
+ self.reference("""<!ENTITY foo 'fooVal'>
+<!ENTITY bar 'bad &val'>
+<!ENTITY eff 'effVal'>""")
+ self.localized("""<!ENTITY foo 'fooVal'>
+<!ENTITY bar 'good val'>
+<!ENTITY eff 'effVal'>
+""")
+ cc = ContentComparer([Observer()])
+ cc.compare(File(self.ref, "en-reference.dtd", ""),
+ File(self.l10n, "l10n.dtd", ""),
+ mozpath.join(self.tmp, "merge", "l10n.dtd"))
+ self.assertDictEqual(
+ cc.observers[0].toJSON(),
+ {'summary':
+ {None: {
+ 'warnings': 1,
+ 'unchanged': 2,
+ 'unchanged_w': 2,
+ 'changed': 1,
+ 'changed_w': 2
+ }},
+ 'details': {
+ 'l10n.dtd': [
+ {'warning': u"can't parse en-US value at line 1, column 0 for bar"}]
+ }
+ })
+
if __name__ == '__main__':
unittest.main()