bug 1292557, update compare-locales to 1.1, r?flod draft
authorAxel Hecht <axel@pike.org>
Fri, 05 Aug 2016 15:48:21 +0200
changeset 397205 b36f343426b4735c729f900a8c90b97d7af87a42
parent 397204 d320ef56876f52db9bc0eb79554c7332d4793769
child 527401 a5a033306c31f23368ef011cb58a173fb8cfec11
push id25236
push useraxel@mozilla.com
push dateFri, 05 Aug 2016 13:49:43 +0000
reviewersflod
bugs1292557, 1238150, 1292215
milestone51.0a1
bug 1292557, update compare-locales to 1.1, r?flod Notable changes: Bug 1238150 - Don't consider trailing comments as junk Bug 1292215 - Drop Junk entities from merged content, more tests MozReview-Commit-ID: IkJMxBByHxK
python/compare-locales/compare_locales/__init__.py
python/compare-locales/compare_locales/compare.py
python/compare-locales/compare_locales/parser.py
python/compare-locales/compare_locales/tests/__init__.py
python/compare-locales/compare_locales/tests/test_compare.py
python/compare-locales/compare_locales/tests/test_ini.py
python/compare-locales/compare_locales/tests/test_merge.py
--- a/python/compare-locales/compare_locales/__init__.py
+++ b/python/compare-locales/compare_locales/__init__.py
@@ -1,1 +1,1 @@
-version = "1.0"
+version = "1.1"
--- a/python/compare-locales/compare_locales/compare.py
+++ b/python/compare-locales/compare_locales/compare.py
@@ -393,17 +393,18 @@ class ContentComparer:
             shutil.copyfile(ref_file.fullpath, outfile)
             print "copied reference to " + outfile
             return
         if skips:
             # skips come in ordered by key name, we need them in file order
             skips.sort(key=lambda s: s.span[0])
         trailing = (['\n'] +
                     [ref_entities[ref_map[key]].all for key in missing] +
-                    [ref_entities[ref_map[skip.key]].all for skip in skips])
+                    [ref_entities[ref_map[skip.key]].all for skip in skips
+                     if not isinstance(skip, parser.Junk)])
         if skips:
             # we need to skip a few errornous blocks in the input, copy by hand
             f = codecs.open(outfile, 'wb', p.encoding)
             offset = 0
             for skip in skips:
                 chunk = skip.span
                 f.write(p.contents[offset:chunk[0]])
                 offset = chunk[1]
@@ -498,16 +499,18 @@ class ContentComparer:
             elif action == 'add':
                 # obsolete entity or junk
                 if isinstance(l10n_entities[l10n_map[item_or_pair]],
                               parser.Junk):
                     junk = l10n_entities[l10n_map[item_or_pair]]
                     params = (junk.val,) + junk.span
                     self.notify('error', l10n,
                                 'Unparsed content "%s" at %d-%d' % params)
+                    if self.merge_stage is not None:
+                        skips.append(junk)
                 elif self.notify('obsoleteEntity', l10n,
                                  item_or_pair) != 'ignore':
                     obsolete += 1
             else:
                 # entity found in both ref and l10n, check for changed
                 entity = item_or_pair[0]
                 refent = ref[0][ref[1][entity]]
                 l10nent = l10n_entities[l10n_map[entity]]
--- a/python/compare-locales/compare_locales/parser.py
+++ b/python/compare-locales/compare_locales/parser.py
@@ -168,17 +168,17 @@ class Parser:
         return (header, offset)
 
     def getEntity(self, contents, offset):
         m = self.reKey.match(contents, offset)
         if m:
             offset = m.end()
             entity = self.createEntity(contents, m)
             return (entity, offset)
-        # first check if footer has a non-empy match,
+        # first check if footer has a non-empty match,
         # 'cause then we don't find junk
         m = self.reFooter.match(contents, offset)
         if m and m.end() > offset:
             return (None, offset)
         m = self.reKey.search(contents, offset)
         if m:
             # we didn't match, but search, so there's junk between offset
             # and start. We'll match() on the next turn
@@ -370,17 +370,17 @@ class IniParser(Parser):
     whitespace*
     #comment
     string=value
     ...
     '''
     def __init__(self):
         self.reHeader = re.compile('^((?:\s*|[;#].*)\n)*\[.+?\]\n', re.M)
         self.reKey = re.compile('(\s*)((?:[;#].*\n\s*)*)((.+?)=(.*))(\n?)')
-        self.reFooter = re.compile('\s*')
+        self.reFooter = re.compile('\s*([;#].*\s*)*$')
         Parser.__init__(self)
 
 
 DECL, COMMENT, START, END, CONTENT = range(5)
 
 
 class BookmarksParserInner(HTMLParser):
 
--- a/python/compare-locales/compare_locales/tests/__init__.py
+++ b/python/compare-locales/compare_locales/tests/__init__.py
@@ -35,15 +35,15 @@ class ParserTestMixin():
     def _test(self, content, refs):
         '''Helper to test the parser.
         Compares the result of parsing content with the given list
         of reference keys and values.
         '''
         self.parser.readContents(content)
         entities = [entity for entity in self.parser]
         for entity, ref in izip_longest(entities, refs):
-            self.assertTrue(entity, 'excess reference entitiy')
+            self.assertTrue(entity, 'excess reference entity')
             self.assertTrue(ref, 'excess parsed entity')
             self.assertEqual(entity.val, ref[1])
             if ref[0].startswith('_junk'):
                 self.assertTrue(re.match(ref[0], entity.key))
             else:
                 self.assertEqual(entity.key, ref[0])
new file mode 100644
--- /dev/null
+++ b/python/compare-locales/compare_locales/tests/test_compare.py
@@ -0,0 +1,90 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import unittest
+
+from compare_locales import compare
+
+
+class TestTree(unittest.TestCase):
+    '''Test the Tree utility class
+
+    Tree value classes need to be in-place editable
+    '''
+
+    def test_empty_dict(self):
+        tree = compare.Tree(dict)
+        self.assertEqual(list(tree.getContent()), [])
+        self.assertDictEqual(
+            tree.toJSON(),
+            {}
+        )
+
+    def test_disjoint_dict(self):
+        tree = compare.Tree(dict)
+        tree['one/entry']['leaf'] = 1
+        tree['two/other']['leaf'] = 2
+        self.assertEqual(
+            list(tree.getContent()),
+            [
+                (0, 'key', ('one', 'entry')),
+                (1, 'value', {'leaf': 1}),
+                (0, 'key', ('two', 'other')),
+                (1, 'value', {'leaf': 2})
+            ]
+        )
+        self.assertDictEqual(
+            tree.toJSON(),
+            {
+                'children': [
+                    ('one/entry',
+                     {'value': {'leaf': 1}}
+                     ),
+                    ('two/other',
+                     {'value': {'leaf': 2}}
+                     )
+                ]
+            }
+        )
+        self.assertMultiLineEqual(
+            str(tree),
+            '''\
+one/entry
+    {'leaf': 1}
+two/other
+    {'leaf': 2}\
+'''
+        )
+
+    def test_overlapping_dict(self):
+        tree = compare.Tree(dict)
+        tree['one/entry']['leaf'] = 1
+        tree['one/other']['leaf'] = 2
+        self.assertEqual(
+            list(tree.getContent()),
+            [
+                (0, 'key', ('one',)),
+                (1, 'key', ('entry',)),
+                (2, 'value', {'leaf': 1}),
+                (1, 'key', ('other',)),
+                (2, 'value', {'leaf': 2})
+            ]
+        )
+        self.assertDictEqual(
+            tree.toJSON(),
+            {
+                'children': [
+                    ('one', {
+                        'children': [
+                            ('entry',
+                             {'value': {'leaf': 1}}
+                             ),
+                            ('other',
+                             {'value': {'leaf': 2}}
+                             )
+                        ]
+                    })
+                ]
+            }
+        )
--- a/python/compare-locales/compare_locales/tests/test_ini.py
+++ b/python/compare-locales/compare_locales/tests/test_ini.py
@@ -46,20 +46,70 @@ TitleText=Some Title
 
 ; more comments
 
 [Strings]
 TitleText=Some Title
 ''', (('TitleText', 'Some Title'),))
         self.assert_('MPL' in self.parser.header)
 
-    def testMPL2_Junk(self):
+    def testMPL2_JunkBeforeCategory(self):
         self._test(mpl2 + '''\
 Junk
 [Strings]
 TitleText=Some Title
 ''', (('_junk_\\d+_0-213$', mpl2 + '''\
 Junk
 [Strings]'''), ('TitleText', 'Some Title')))
         self.assert_('MPL' not in self.parser.header)
 
+    def test_TrailingComment(self):
+        self._test(mpl2 + '''
+[Strings]
+TitleText=Some Title
+;Stray trailing comment
+''', (('TitleText', 'Some Title'),))
+        self.assert_('MPL' in self.parser.header)
+
+    def test_SpacedTrailingComments(self):
+        self._test(mpl2 + '''
+[Strings]
+TitleText=Some Title
+
+;Stray trailing comment
+;Second stray comment
+
+''', (('TitleText', 'Some Title'),))
+        self.assert_('MPL' in self.parser.header)
+
+    def test_TrailingCommentsAndJunk(self):
+        self._test(mpl2 + '''
+[Strings]
+TitleText=Some Title
+
+;Stray trailing comment
+Junk
+;Second stray comment
+
+''', (('TitleText', 'Some Title'), ('_junk_\\d+_231-284$', '''\
+
+;Stray trailing comment
+Junk
+;Second stray comment
+
+''')))
+        self.assert_('MPL' in self.parser.header)
+
+    def test_JunkInbetweenEntries(self):
+        self._test(mpl2 + '''
+[Strings]
+TitleText=Some Title
+
+Junk
+
+Good=other string
+''', (('TitleText', 'Some Title'), ('_junk_\\d+_231-236$', '''\
+
+Junk'''), ('Good', 'other string')))
+        self.assert_('MPL' in self.parser.header)
+
 if __name__ == '__main__':
     unittest.main()
--- a/python/compare-locales/compare_locales/tests/test_merge.py
+++ b/python/compare-locales/compare_locales/tests/test_merge.py
@@ -7,99 +7,259 @@ import os
 from tempfile import mkdtemp
 import shutil
 
 from compare_locales.parser import getParser
 from compare_locales.paths import File
 from compare_locales.compare import ContentComparer
 
 
-class TestProperties(unittest.TestCase):
+class ContentMixin(object):
+    maxDiff = None  # we got big dictionaries to compare
+    extension = None  # OVERLOAD
+
+    def reference(self, content):
+        self.ref = os.path.join(self.tmp, "en-reference" + self.extension)
+        open(self.ref, "w").write(content)
+
+    def localized(self, content):
+        self.l10n = os.path.join(self.tmp, "l10n" + self.extension)
+        open(self.l10n, "w").write(content)
+
+
+class TestProperties(unittest.TestCase, ContentMixin):
+    extension = '.properties'
 
     def setUp(self):
         self.tmp = mkdtemp()
         os.mkdir(os.path.join(self.tmp, "merge"))
-        self.ref = os.path.join(self.tmp, "en-reference.properties")
-        open(self.ref, "w").write("""foo = fooVal
-bar = barVal
-eff = effVal""")
 
     def tearDown(self):
         shutil.rmtree(self.tmp)
         del self.tmp
 
     def testGood(self):
         self.assertTrue(os.path.isdir(self.tmp))
-        l10n = os.path.join(self.tmp, "l10n.properties")
-        open(l10n, "w").write("""foo = lFoo
+        self.reference("""foo = fooVal
+bar = barVal
+eff = effVal""")
+        self.localized("""foo = lFoo
 bar = lBar
 eff = lEff
 """)
         cc = ContentComparer()
         cc.set_merge_stage(os.path.join(self.tmp, "merge"))
         cc.compare(File(self.ref, "en-reference.properties", ""),
-                   File(l10n, "l10n.properties", ""))
-        print cc.observer.serialize()
+                   File(self.l10n, "l10n.properties", ""))
+        self.assertDictEqual(
+            cc.observer.toJSON(),
+            {'summary':
+                {None: {
+                    'changed': 3
+                }},
+             'details': {}
+             }
+        )
+        self.assert_(not os.path.exists(os.path.join(cc.merge_stage,
+                                                     'l10n.properties')))
 
     def testMissing(self):
         self.assertTrue(os.path.isdir(self.tmp))
-        l10n = os.path.join(self.tmp, "l10n.properties")
-        open(l10n, "w").write("""bar = lBar
+        self.reference("""foo = fooVal
+bar = barVal
+eff = effVal""")
+        self.localized("""bar = lBar
 """)
         cc = ContentComparer()
         cc.set_merge_stage(os.path.join(self.tmp, "merge"))
         cc.compare(File(self.ref, "en-reference.properties", ""),
-                   File(l10n, "l10n.properties", ""))
-        print cc.observer.serialize()
+                   File(self.l10n, "l10n.properties", ""))
+        self.assertDictEqual(
+            cc.observer.toJSON(),
+            {'summary':
+                {None: {
+                    'changed': 1, 'missing': 2
+                }},
+             'details': {
+                 'children': [
+                     ('l10n.properties',
+                         {'value': {'missingEntity': [u'eff', u'foo']}}
+                      )
+                 ]}
+             }
+        )
         mergefile = os.path.join(self.tmp, "merge", "l10n.properties")
         self.assertTrue(os.path.isfile(mergefile))
         p = getParser(mergefile)
         p.readFile(mergefile)
         [m, n] = p.parse()
         self.assertEqual(map(lambda e: e.key,  m), ["bar", "eff", "foo"])
 
+    def testError(self):
+        self.assertTrue(os.path.isdir(self.tmp))
+        self.reference("""foo = fooVal
+bar = %d barVal
+eff = effVal""")
+        self.localized("""bar = %S lBar
+eff = leffVal
+""")
+        cc = ContentComparer()
+        cc.set_merge_stage(os.path.join(self.tmp, "merge"))
+        cc.compare(File(self.ref, "en-reference.properties", ""),
+                   File(self.l10n, "l10n.properties", ""))
+        self.assertDictEqual(
+            cc.observer.toJSON(),
+            {'summary':
+                {None: {
+                    'changed': 2, 'errors': 1, 'missing': 1
+                }},
+             'details': {
+                 'children': [
+                     ('l10n.properties',
+                         {'value': {
+                          'error': [u'argument 1 `S` should be `d` '
+                                    u'at line 1, column 6 for bar'],
+                          'missingEntity': [u'foo']}}
+                      )
+                 ]}
+             }
+        )
+        mergefile = os.path.join(self.tmp, "merge", "l10n.properties")
+        self.assertTrue(os.path.isfile(mergefile))
+        p = getParser(mergefile)
+        p.readFile(mergefile)
+        [m, n] = p.parse()
+        self.assertEqual([e.key for e in m], ["eff", "foo", "bar"])
+        self.assertEqual(m[n['bar']].val, '%d barVal')
 
-class TestDTD(unittest.TestCase):
+    def testObsolete(self):
+        self.assertTrue(os.path.isdir(self.tmp))
+        self.reference("""foo = fooVal
+eff = effVal""")
+        self.localized("""foo = fooVal
+other = obsolete
+eff = leffVal
+""")
+        cc = ContentComparer()
+        cc.set_merge_stage(os.path.join(self.tmp, "merge"))
+        cc.compare(File(self.ref, "en-reference.properties", ""),
+                   File(self.l10n, "l10n.properties", ""))
+        self.assertDictEqual(
+            cc.observer.toJSON(),
+            {'summary':
+                {None: {
+                    'changed': 1, 'obsolete': 1, 'unchanged': 1
+                }},
+             'details': {
+                 'children': [
+                     ('l10n.properties',
+                         {'value': {'obsoleteEntity': [u'other']}})]},
+             }
+        )
+
+
+class TestDTD(unittest.TestCase, ContentMixin):
+    extension = '.dtd'
 
     def setUp(self):
         self.tmp = mkdtemp()
         os.mkdir(os.path.join(self.tmp, "merge"))
-        self.ref = os.path.join(self.tmp, "en-reference.dtd")
-        open(self.ref, "w").write("""<!ENTITY foo 'fooVal'>
-<!ENTITY bar 'barVal'>
-<!ENTITY eff 'effVal'>""")
 
     def tearDown(self):
         shutil.rmtree(self.tmp)
         del self.tmp
 
     def testGood(self):
         self.assertTrue(os.path.isdir(self.tmp))
-        l10n = os.path.join(self.tmp, "l10n.dtd")
-        open(l10n, "w").write("""<!ENTITY foo 'lFoo'>
+        self.reference("""<!ENTITY foo 'fooVal'>
+<!ENTITY bar 'barVal'>
+<!ENTITY eff 'effVal'>""")
+        self.localized("""<!ENTITY foo 'lFoo'>
 <!ENTITY bar 'lBar'>
 <!ENTITY eff 'lEff'>
 """)
         cc = ContentComparer()
         cc.set_merge_stage(os.path.join(self.tmp, "merge"))
         cc.compare(File(self.ref, "en-reference.dtd", ""),
-                   File(l10n, "l10n.dtd", ""))
-        print cc.observer.serialize()
+                   File(self.l10n, "l10n.dtd", ""))
+        self.assertDictEqual(
+            cc.observer.toJSON(),
+            {'summary':
+                {None: {
+                    'changed': 3
+                }},
+             'details': {}
+             }
+        )
+        self.assert_(
+            not os.path.exists(os.path.join(cc.merge_stage, 'l10n.dtd')))
 
     def testMissing(self):
         self.assertTrue(os.path.isdir(self.tmp))
-        l10n = os.path.join(self.tmp, "l10n.dtd")
-        open(l10n, "w").write("""<!ENTITY bar 'lBar'>
+        self.reference("""<!ENTITY foo 'fooVal'>
+<!ENTITY bar 'barVal'>
+<!ENTITY eff 'effVal'>""")
+        self.localized("""<!ENTITY bar 'lBar'>
 """)
         cc = ContentComparer()
         cc.set_merge_stage(os.path.join(self.tmp, "merge"))
         cc.compare(File(self.ref, "en-reference.dtd", ""),
-                   File(l10n, "l10n.dtd", ""))
-        print cc.observer.serialize()
+                   File(self.l10n, "l10n.dtd", ""))
+        self.assertDictEqual(
+            cc.observer.toJSON(),
+            {'summary':
+                {None: {
+                    'changed': 1, 'missing': 2
+                }},
+             'details': {
+                 'children': [
+                     ('l10n.dtd',
+                         {'value': {'missingEntity': [u'eff', u'foo']}}
+                      )
+                 ]}
+             }
+        )
         mergefile = os.path.join(self.tmp, "merge", "l10n.dtd")
         self.assertTrue(os.path.isfile(mergefile))
         p = getParser(mergefile)
         p.readFile(mergefile)
         [m, n] = p.parse()
         self.assertEqual(map(lambda e: e.key,  m), ["bar", "eff", "foo"])
 
+    def testJunk(self):
+        self.assertTrue(os.path.isdir(self.tmp))
+        self.reference("""<!ENTITY foo 'fooVal'>
+<!ENTITY bar 'barVal'>
+<!ENTITY eff 'effVal'>""")
+        self.localized("""<!ENTITY foo 'fooVal'>
+<!ENTY bar 'gimmick'>
+<!ENTITY eff 'effVal'>
+""")
+        cc = ContentComparer()
+        cc.set_merge_stage(os.path.join(self.tmp, "merge"))
+        cc.compare(File(self.ref, "en-reference.dtd", ""),
+                   File(self.l10n, "l10n.dtd", ""))
+        self.assertDictEqual(
+            cc.observer.toJSON(),
+            {'summary':
+                {None: {
+                    'errors': 1, 'missing': 1, 'unchanged': 2
+                }},
+             'details': {
+                 'children': [
+                     ('l10n.dtd',
+                         {'value': {
+                             'error': [u'Unparsed content "<!ENTY bar '
+                                       u'\'gimmick\'>" at 23-44'],
+                             'missingEntity': [u'bar']}}
+                      )
+                 ]}
+             }
+        )
+        mergefile = os.path.join(self.tmp, "merge", "l10n.dtd")
+        self.assertTrue(os.path.isfile(mergefile))
+        p = getParser(mergefile)
+        p.readFile(mergefile)
+        [m, n] = p.parse()
+        self.assertEqual(map(lambda e: e.key,  m), ["foo", "eff", "bar"])
+
 if __name__ == '__main__':
     unittest.main()