bug 1368449, data storage and iteration, r=mathjazz
authorAxel Hecht <axel@pike.org>
Mon, 29 May 2017 14:19:52 +0200
changeset 249 9a8edbd6e4577263427086388502d4a413b21fae
parent 245 8b6af885adb94191f241f2c75a0edb8927bc20bb
child 250 6d195f94511e87a2d75508891cdf3139a6ac0d0f
child 251 c2653a575e552bc33ecb462f177dd163afd89144
child 252 694077034d1327943f802f17543285425dc4de3d
child 255 47c671572cdaea58bd33bb8cf0a7c6842fc78137
push id57
push useraxel@mozilla.com
push dateTue, 30 May 2017 09:04:31 +0000
reviewersmathjazz
bugs1368449, 1368444
bug 1368449, data storage and iteration, r=mathjazz This changes the data storage from a dict like { 'missingEntity': [ 'onekey', 'zoom' ], 'obsoleteEntity': [ 'more' ] } to a list like [ {'missingEntity': 'onekey'}, {'obsoleteEntity': 'more'}, {'missingEntity': 'zoom'} ] With this patch there is only a single location where we sort, which is going to be good for bug 1368444. I had to rip out the use of SequenceMatcher, because that doesn't persist order of the keys, it always issues "delete" first, and then "add", regardless whether the added key would come first or second. And while I was at that, I dropped the whole construct of yielding the equal key twice. I did that to mimic SequenceMatcher, and I don't have to. Makes the code easier to read, too. MozReview-Commit-ID: HUWnnBvzyAb
compare_locales/compare.py
compare_locales/tests/test_compare.py
compare_locales/tests/test_merge.py
--- a/compare_locales/compare.py
+++ b/compare_locales/compare.py
@@ -3,17 +3,16 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 'Mozilla l10n compare locales tool'
 
 import codecs
 import os
 import shutil
 import re
-from difflib import SequenceMatcher
 from collections import defaultdict
 
 try:
     from json import dumps
 except:
     from simplejson import dumps
 
 from compare_locales import parser
@@ -87,77 +86,63 @@ class Tree(object):
             for child in self.branches[key].getContent(depth + 1):
                 yield child
 
     def toJSON(self):
         '''
         Returns this Tree as a JSON-able tree of hashes.
         Only the values need to take care that they're JSON-able.
         '''
-        json = {}
-        keys = self.branches.keys()
-        keys.sort()
         if self.value is not None:
-            json['value'] = self.value
-        children = [('/'.join(key), self.branches[key].toJSON())
-                    for key in keys]
-        if children:
-            json['children'] = children
-        return json
+            return self.value
+        return dict(('/'.join(key), self.branches[key].toJSON())
+                    for key in self.branches.keys())
 
     def getStrRows(self):
         def tostr(t):
             if t[1] == 'key':
                 return self.indent * t[0] + '/'.join(t[2])
             return self.indent * (t[0] + 1) + str(t[2])
 
         return map(tostr, self.getContent())
 
     def __str__(self):
         return '\n'.join(self.getStrRows())
 
 
-class AddRemove(SequenceMatcher):
-    def __init__(self):
-        SequenceMatcher.__init__(self, None, None, None)
+class AddRemove(object):
+    def __init__(self, key=None):
+        self.left = self.right = None
+        self.key = key
 
     def set_left(self, left):
-        if not isinstance(left, list):
-            left = [l for l in left]
-        self.set_seq1(left)
+        if not isinstance(left, set):
+            left = set(l for l in left)
+        self.left = left
 
     def set_right(self, right):
-        if not isinstance(right, list):
-            right = [l for l in right]
-        self.set_seq2(right)
+        if not isinstance(right, set):
+            right = set(l for l in right)
+        self.right = right
 
     def __iter__(self):
-        for tag, i1, i2, j1, j2 in self.get_opcodes():
-            if tag == 'equal':
-                for pair in zip(self.a[i1:i2], self.b[j1:j2]):
-                    yield ('equal', pair)
-            elif tag == 'delete':
-                for item in self.a[i1:i2]:
-                    yield ('delete', item)
-            elif tag == 'insert':
-                for item in self.b[j1:j2]:
-                    yield ('add', item)
+        for item in sorted(self.left | self.right, key=self.key):
+            if item in self.left and item in self.right:
+                yield ('equal', item)
+            elif item in self.left:
+                yield ('delete', item)
             else:
-                # tag == 'replace'
-                for item in self.a[i1:i2]:
-                    yield ('delete', item)
-                for item in self.b[j1:j2]:
-                    yield ('add', item)
+                yield ('add', item)
 
 
 class Observer(object):
 
     def __init__(self, filter=None, file_stats=False):
         self.summary = defaultdict(lambda: defaultdict(int))
-        self.details = Tree(dict)
+        self.details = Tree(list)
         self.filter = filter
         self.file_stats = None
         if file_stats:
             self.file_stats = defaultdict(lambda: defaultdict(dict))
 
     # support pickling
     def __getstate__(self):
         state = dict(summary=self._dictify(self.summary), details=self.details)
@@ -200,54 +185,41 @@ class Observer(object):
             return
         for category, value in stats.iteritems():
             self.summary[file.locale][category] += value
         if self.file_stats is None:
             return
         if 'missingInFiles' in stats:
             # keep track of how many strings are in a missing file
             # we got the {'missingFile': 'error'} from the notify pass
-            self.details[file]['strings'] = stats['missingInFiles']
+            self.details[file].append({'count': stats['missingInFiles']})
             # missingInFiles should just be "missing" in file stats
             self.file_stats[file.locale][file.localpath]['missing'] = \
                 stats['missingInFiles']
             return  # there are no other stats for missing files
         self.file_stats[file.locale][file.localpath].update(stats)
 
     def notify(self, category, file, data):
         rv = 'error'
         if category in ['missingFile', 'obsoleteFile']:
             if self.filter is not None:
                 rv = self.filter(file)
             if rv != "ignore":
-                self.details[file][category] = rv
+                self.details[file].append({category: rv})
             return rv
         if category in ['missingEntity', 'obsoleteEntity']:
             if self.filter is not None:
                 rv = self.filter(file, data)
             if rv == "ignore":
                 return rv
-            v = self.details[file]
-            try:
-                v[category].append(data)
-            except KeyError:
-                v[category] = [data]
+            self.details[file].append({category: data})
             return rv
-        if category == 'error':
-            try:
-                self.details[file][category].append(data)
-            except KeyError:
-                self.details[file][category] = [data]
-            self.summary[file.locale]['errors'] += 1
-        elif category == 'warning':
-            try:
-                self.details[file][category].append(data)
-            except KeyError:
-                self.details[file][category] = [data]
-            self.summary[file.locale]['warnings'] += 1
+        if category in ('error', 'warning'):
+            self.details[file].append({category: data})
+            self.summary[file.locale][category + 's'] += 1
         return rv
 
     def toExhibit(self):
         items = []
         for locale in sorted(self.summary.iterkeys()):
             summary = self.summary[locale]
             if locale is not None:
                 item = {'id': 'xxx/' + locale,
@@ -304,36 +276,29 @@ class Observer(object):
         if type == "json":
             return dumps(self.toJSON())
 
         def tostr(t):
             if t[1] == 'key':
                 return '  ' * t[0] + '/'.join(t[2])
             o = []
             indent = '  ' * (t[0] + 1)
-            if 'error' in t[2]:
-                o += [indent + 'ERROR: ' + e for e in t[2]['error']]
-            if 'warning' in t[2]:
-                o += [indent + 'WARNING: ' + e for e in t[2]['warning']]
-            if 'missingEntity' in t[2] or 'obsoleteEntity' in t[2]:
-                missingEntities = ('missingEntity' in t[2] and
-                                   t[2]['missingEntity']) or []
-                obsoleteEntities = ('obsoleteEntity' in t[2] and
-                                    t[2]['obsoleteEntity']) or []
-                entities = missingEntities + obsoleteEntities
-                entities.sort()
-                for entity in entities:
-                    op = '+'
-                    if entity in obsoleteEntities:
-                        op = '-'
-                    o.append(indent + op + entity)
-            elif 'missingFile' in t[2]:
-                o.append(indent + '// add and localize this file')
-            elif 'obsoleteFile' in t[2]:
-                o.append(indent + '// remove this file')
+            for item in t[2]:
+                if 'error' in item:
+                    o += [indent + 'ERROR: ' + item['error']]
+                elif 'warning' in item:
+                    o += [indent + 'WARNING: ' + item['warning']]
+                elif 'missingEntity' in item:
+                    o += [indent + '+' + item['missingEntity']]
+                elif 'obsoleteEntity' in item:
+                    o += [indent + '-' + item['obsoleteEntity']]
+                elif 'missingFile' in item:
+                    o.append(indent + '// add and localize this file')
+                elif 'obsoleteFile' in item:
+                    o.append(indent + '// remove this file')
             return '\n'.join(o)
 
         out = []
         for locale, summary in sorted(self.summary.iteritems()):
             if locale is not None:
                 out.append(locale + ':')
             out += [k + ': ' + str(v) for k, v in sorted(summary.iteritems())]
             total = sum([summary[k]
@@ -478,49 +443,48 @@ class ContentComparer:
         ar = AddRemove()
         ar.set_left(ref_list)
         ar.set_right(l10n_list)
         report = missing = obsolete = changed = unchanged = keys = 0
         missing_w = changed_w = unchanged_w = 0  # word stats
         missings = []
         skips = []
         checker = getChecker(l10n, reference=ref[0], extra_tests=extra_tests)
-        for action, item_or_pair in ar:
+        for action, entity in ar:
             if action == 'delete':
                 # missing entity
-                _rv = self.notify('missingEntity', l10n, item_or_pair)
+                _rv = self.notify('missingEntity', l10n, entity)
                 if _rv == "ignore":
                     continue
                 if _rv == "error":
                     # only add to missing entities for l10n-merge on error,
                     # not report
-                    missings.append(item_or_pair)
+                    missings.append(entity)
                     missing += 1
-                    refent = ref[0][ref[1][item_or_pair]]
+                    refent = ref[0][ref[1][entity]]
                     missing_w += self.countWords(refent.val)
                 else:
                     # just report
                     report += 1
             elif action == 'add':
                 # obsolete entity or junk
-                if isinstance(l10n_entities[l10n_map[item_or_pair]],
+                if isinstance(l10n_entities[l10n_map[entity]],
                               parser.Junk):
-                    junk = l10n_entities[l10n_map[item_or_pair]]
+                    junk = l10n_entities[l10n_map[entity]]
                     params = (junk.val,) + junk.position() + junk.position(-1)
                     self.notify('error', l10n,
                                 'Unparsed content "%s" from line %d column %d'
                                 ' to line %d column %d' % params)
                     if self.merge_stage is not None:
                         skips.append(junk)
                 elif self.notify('obsoleteEntity', l10n,
-                                 item_or_pair) != 'ignore':
+                                 entity) != 'ignore':
                     obsolete += 1
             else:
                 # entity found in both ref and l10n, check for changed
-                entity = item_or_pair[0]
                 refent = ref[0][ref[1][entity]]
                 l10nent = l10n_entities[l10n_map[entity]]
                 if self.keyRE.search(entity):
                     keys += 1
                 else:
                     if refent.val == l10nent.val:
                         self.doUnchanged(l10nent)
                         unchanged += 1
--- a/compare_locales/tests/test_compare.py
+++ b/compare_locales/tests/test_compare.py
@@ -33,24 +33,20 @@ class TestTree(unittest.TestCase):
                 (1, 'value', {'leaf': 1}),
                 (0, 'key', ('two', 'other')),
                 (1, 'value', {'leaf': 2})
             ]
         )
         self.assertDictEqual(
             tree.toJSON(),
             {
-                'children': [
-                    ('one/entry',
-                     {'value': {'leaf': 1}}
-                     ),
-                    ('two/other',
-                     {'value': {'leaf': 2}}
-                     )
-                ]
+                'one/entry':
+                    {'leaf': 1},
+                'two/other':
+                    {'leaf': 2}
             }
         )
         self.assertMultiLineEqual(
             str(tree),
             '''\
 one/entry
     {'leaf': 1}
 two/other
@@ -70,106 +66,111 @@ two/other
                 (2, 'value', {'leaf': 1}),
                 (1, 'key', ('other',)),
                 (2, 'value', {'leaf': 2})
             ]
         )
         self.assertDictEqual(
             tree.toJSON(),
             {
-                'children': [
-                    ('one', {
-                        'children': [
-                            ('entry',
-                             {'value': {'leaf': 1}}
-                             ),
-                            ('other',
-                             {'value': {'leaf': 2}}
-                             )
-                        ]
-                    })
-                ]
+                'one': {
+                    'entry':
+                        {'leaf': 1},
+                    'other':
+                        {'leaf': 2}
+                }
             }
         )
 
 
 class TestObserver(unittest.TestCase):
     def test_simple(self):
         obs = compare.Observer()
         f = paths.File('/some/real/sub/path', 'sub/path', locale='de')
-        obs.notify('missingEntity', f, ['one', 'two'])
+        obs.notify('missingEntity', f, 'one')
+        obs.notify('missingEntity', f, 'two')
         obs.updateStats(f, {'missing': 15})
         self.assertDictEqual(obs.toJSON(), {
             'summary': {
                 'de': {
                     'missing': 15
                 }
             },
             'details': {
-                'children': [
-                    ('de/sub/path',
-                     {'value': {'missingEntity': [['one', 'two']]}}
-                     )
-                ]
+                'de/sub/path':
+                    [{'missingEntity': 'one'},
+                     {'missingEntity': 'two'}]
             }
         })
         clone = loads(dumps(obs))
         self.assertDictEqual(clone.summary, obs.summary)
         self.assertDictEqual(clone.details.toJSON(), obs.details.toJSON())
         self.assertIsNone(clone.file_stats)
 
     def test_module(self):
         obs = compare.Observer(file_stats=True)
         f = paths.File('/some/real/sub/path', 'path',
                        module='sub', locale='de')
-        obs.notify('missingEntity', f, ['one', 'two'])
+        obs.notify('missingEntity', f, 'one')
+        obs.notify('obsoleteEntity', f, 'bar')
+        obs.notify('missingEntity', f, 'two')
         obs.updateStats(f, {'missing': 15})
         self.assertDictEqual(obs.toJSON(), {
             'summary': {
                 'de': {
                     'missing': 15
                 }
             },
             'details': {
-                'children': [
-                    ('de/sub/path',
-                     {'value': {'missingEntity': [['one', 'two']]}}
-                     )
-                ]
+                'de/sub/path':
+                    [
+                     {'missingEntity': 'one'},
+                     {'obsoleteEntity': 'bar'},
+                     {'missingEntity': 'two'},
+                    ]
             }
         })
         self.assertDictEqual(obs.file_stats, {
             'de': {
                 'sub/path': {
                     'missing': 15
                 }
             }
         })
+        self.assertEqual(obs.serialize(), '''\
+de/sub/path
+    +one
+    -bar
+    +two
+de:
+missing: 15
+0% of entries changed''')
         clone = loads(dumps(obs))
         self.assertDictEqual(clone.summary, obs.summary)
         self.assertDictEqual(clone.details.toJSON(), obs.details.toJSON())
         self.assertDictEqual(clone.file_stats, obs.file_stats)
 
     def test_file_stats(self):
         obs = compare.Observer(file_stats=True)
         f = paths.File('/some/real/sub/path', 'sub/path', locale='de')
-        obs.notify('missingEntity', f, ['one', 'two'])
+        obs.notify('missingEntity', f, 'one')
+        obs.notify('missingEntity', f, 'two')
         obs.updateStats(f, {'missing': 15})
         self.assertDictEqual(obs.toJSON(), {
             'summary': {
                 'de': {
                     'missing': 15
                 }
             },
             'details': {
-                'children': [
-                    ('de/sub/path',
-                     {'value': {'missingEntity': [['one', 'two']]}}
-                     )
-                ]
+                'de/sub/path':
+                    [
+                     {'missingEntity': 'one'},
+                     {'missingEntity': 'two'},
+                    ]
             }
         })
         self.assertDictEqual(obs.file_stats, {
             'de': {
                 'sub/path': {
                     'missing': 15
                 }
             }
--- a/compare_locales/tests/test_merge.py
+++ b/compare_locales/tests/test_merge.py
@@ -79,23 +79,21 @@ eff = effVal""")
             {'summary':
                 {None: {
                     'changed': 1,
                     'changed_w': 1,
                     'missing': 2,
                     'missing_w': 2
                 }},
              'details': {
-                 'children': [
-                     ('l10n.properties',
-                         {'value': {'missingEntity': [u'eff', u'foo']}}
-                      )
-                 ]}
-             }
-        )
+                 'l10n.properties': [
+                     {'missingEntity': u'eff'},
+                     {'missingEntity': u'foo'}]
+                }
+             })
         mergefile = mozpath.join(self.tmp, "merge", "l10n.properties")
         self.assertTrue(os.path.isfile(mergefile))
         p = getParser(mergefile)
         p.readFile(mergefile)
         [m, n] = p.parse()
         self.assertEqual(map(lambda e: e.key,  m), ["bar", "eff", "foo"])
 
     def testError(self):
@@ -117,26 +115,22 @@ eff = leffVal
                 {None: {
                     'changed': 2,
                     'changed_w': 3,
                     'errors': 1,
                     'missing': 1,
                     'missing_w': 1
                 }},
              'details': {
-                 'children': [
-                     ('l10n.properties',
-                         {'value': {
-                          'error': [u'argument 1 `S` should be `d` '
-                                    u'at line 1, column 7 for bar'],
-                          'missingEntity': [u'foo']}}
-                      )
-                 ]}
-             }
-        )
+                 'l10n.properties': [
+                     {'error': u'argument 1 `S` should be `d` '
+                               u'at line 1, column 7 for bar'},
+                     {'missingEntity': u'foo'}]
+                }
+             })
         mergefile = mozpath.join(self.tmp, "merge", "l10n.properties")
         self.assertTrue(os.path.isfile(mergefile))
         p = getParser(mergefile)
         p.readFile(mergefile)
         [m, n] = p.parse()
         self.assertEqual([e.key for e in m], ["eff", "foo", "bar"])
         self.assertEqual(m[n['bar']].val, '%d barVal')
 
@@ -158,21 +152,20 @@ eff = leffVal
                 {None: {
                     'changed': 1,
                     'changed_w': 1,
                     'obsolete': 1,
                     'unchanged': 1,
                     'unchanged_w': 1
                 }},
              'details': {
-                 'children': [
-                     ('l10n.properties',
-                         {'value': {'obsoleteEntity': [u'other']}})]},
-             }
-        )
+                 'l10n.properties': [
+                     {'obsoleteEntity': u'other'}]
+                }
+             })
 
 
 class TestDTD(unittest.TestCase, ContentMixin):
     extension = '.dtd'
 
     def setUp(self):
         self.maxDiff = None
         self.tmp = mkdtemp()
@@ -224,23 +217,21 @@ class TestDTD(unittest.TestCase, Content
             {'summary':
                 {None: {
                     'changed': 1,
                     'changed_w': 1,
                     'missing': 2,
                     'missing_w': 2
                 }},
              'details': {
-                 'children': [
-                     ('l10n.dtd',
-                         {'value': {'missingEntity': [u'eff', u'foo']}}
-                      )
-                 ]}
-             }
-        )
+                 'l10n.dtd': [
+                     {'missingEntity': u'eff'},
+                     {'missingEntity': u'foo'}]
+                }
+             })
         mergefile = mozpath.join(self.tmp, "merge", "l10n.dtd")
         self.assertTrue(os.path.isfile(mergefile))
         p = getParser(mergefile)
         p.readFile(mergefile)
         [m, n] = p.parse()
         self.assertEqual(map(lambda e: e.key,  m), ["bar", "eff", "foo"])
 
     def testJunk(self):
@@ -262,28 +253,24 @@ class TestDTD(unittest.TestCase, Content
                 {None: {
                     'errors': 1,
                     'missing': 1,
                     'missing_w': 1,
                     'unchanged': 2,
                     'unchanged_w': 2
                 }},
              'details': {
-                 'children': [
-                     ('l10n.dtd',
-                         {'value': {
-                             'error': [u'Unparsed content "<!ENTY bar '
-                                       u'\'gimmick\'>" '
-                                       u'from line 2 column 1 to '
-                                       u'line 2 column 22'],
-                             'missingEntity': [u'bar']}}
-                      )
-                 ]}
-             }
-        )
+                 'l10n.dtd': [
+                     {'error': u'Unparsed content "<!ENTY bar '
+                               u'\'gimmick\'>" '
+                               u'from line 2 column 1 to '
+                               u'line 2 column 22'},
+                     {'missingEntity': u'bar'}]
+                }
+             })
         mergefile = mozpath.join(self.tmp, "merge", "l10n.dtd")
         self.assertTrue(os.path.isfile(mergefile))
         p = getParser(mergefile)
         p.readFile(mergefile)
         [m, n] = p.parse()
         self.assertEqual(map(lambda e: e.key,  m), ["foo", "eff", "bar"])