bug 1280894, make AddRemove sort stable without map hacks, r=stas
authorAxel Hecht <axel@pike.org>
Thu, 29 Jun 2017 12:14:56 +0200
changeset 286 5e61f6c956815e29bd7ee45cd763b6ea6ce7fb19
parent 285 dd3d1f7841ab309a39e2051e2870f2da393f6b2e
child 287 d1fd0a9d26067b23e7f0aca2aea8b1ab9ffdeceb
push id81
push useraxel@mozilla.com
push dateThu, 29 Jun 2017 13:31:27 +0000
reviewersstas
bugs1280894
bug 1280894, make AddRemove sort stable without map hacks, r=stas The logic to have ordering for AddRemove given externally was a hack, in hindsight. This patch makes AddRemove stable given two iterables, and adds tests to verify that. The test fixes are OK, as the ordering there is for the same position in the localization, and it's kinda ambiguous which should come first or not. This is in support for elmo fluent diffs, 'cause then I can use AddRemove easier there for both keys and attr names. MozReview-Commit-ID: 2NEW4lCxW5M
compare_locales/compare.py
compare_locales/tests/test_compare.py
compare_locales/tests/test_merge.py
--- a/compare_locales/compare.py
+++ b/compare_locales/compare.py
@@ -106,37 +106,48 @@ class Tree(object):
 
         return map(tostr, self.getContent())
 
     def __str__(self):
         return '\n'.join(self.getStrRows())
 
 
 class AddRemove(object):
-    def __init__(self, key=None):
+    def __init__(self):
         self.left = self.right = None
-        self.key = key
 
     def set_left(self, left):
         if not isinstance(left, list):
             left = list(l for l in left)
         self.left = left
 
     def set_right(self, right):
         if not isinstance(right, list):
             right = list(l for l in right)
         self.right = right
 
     def __iter__(self):
-        all_items = self.left[:]
-        all_items.extend(k for k in self.right if k not in self.left)
-        for item in sorted(all_items, key=self.key):
-            if item in self.left and item in self.right:
+        # order_map stores index in left and then index in right
+        order_map = dict((item, (i, -1)) for i, item in enumerate(self.left))
+        left_items = set(order_map)
+        # as we go through the right side, keep track of which left
+        # item we had in right last, and for items not in left,
+        # set the sortmap to (left_offset, right_index)
+        left_offset = -1
+        right_items = set()
+        for i, item in enumerate(self.right):
+            right_items.add(item)
+            if item in order_map:
+                left_offset = order_map[item][0]
+            else:
+                order_map[item] = (left_offset, i)
+        for item in sorted(order_map, key=lambda item: order_map[item]):
+            if item in left_items and item in right_items:
                 yield ('equal', item)
-            elif item in self.left:
+            elif item in left_items:
                 yield ('delete', item)
             else:
                 yield ('add', item)
 
 
 class Observer(object):
 
     def __init__(self, filter=None, file_stats=False):
@@ -318,29 +329,16 @@ class Observer(object):
                         0) / total
             out.append('%d%% of entries changed' % rate)
         return '\n'.join(map(tostr, self.details.getContent()) + out)
 
     def __str__(self):
         return 'observer'
 
 
-class indexof(object):
-    def __init__(self, ref_map):
-        self.ref_map = ref_map
-        self.off = [-1, 0]
-
-    def __call__(self, key):
-        if key in self.ref_map:
-            self.off = [self.ref_map[key], 0]
-        else:
-            self.off[1] += 1
-        return self.off
-
-
 class ContentComparer:
     keyRE = re.compile('[kK]ey')
     nl = re.compile('\n', re.M)
 
     def __init__(self, observers, stat_observers=None):
         '''Create a ContentComparer.
         observer is usually a instance of Observer. The return values
         of the notify method are used to control the handling of missing
@@ -450,29 +448,27 @@ class ContentComparer:
             # no comparison, XXX report?
             return
         try:
             p.readContents(ref_file.getContents())
         except Exception, e:
             self.notify('error', ref_file, str(e))
             return
         ref_entities, ref_map = p.parse()
-        ref_list = sorted(ref_map.keys(), key=lambda k: ref_map[k])
         try:
             p.readContents(l10n.getContents())
             l10n_entities, l10n_map = p.parse()
             l10n_ctx = p.ctx
         except Exception, e:
             self.notify('error', l10n, str(e))
             return
 
-        l10n_list = sorted(l10n_map.keys(), key=lambda k: l10n_map[k])
-        ar = AddRemove(key=indexof(ref_map))
-        ar.set_left(ref_list)
-        ar.set_right(l10n_list)
+        ar = AddRemove()
+        ar.set_left(e.key for e in ref_entities)
+        ar.set_right(e.key for e in l10n_entities)
         report = missing = obsolete = changed = unchanged = keys = 0
         missing_w = changed_w = unchanged_w = 0  # word stats
         missings = []
         skips = []
         checker = getChecker(l10n,
                              reference=ref_entities, extra_tests=extra_tests)
         for msg in p.findDuplicates(ref_entities):
             self.notify('warning', l10n, msg)
--- a/compare_locales/tests/test_compare.py
+++ b/compare_locales/tests/test_compare.py
@@ -174,8 +174,91 @@ 0% of entries changed''')
                     'missing': 15
                 }
             }
         })
         clone = loads(dumps(obs))
         self.assertDictEqual(clone.summary, obs.summary)
         self.assertDictEqual(clone.details.toJSON(), obs.details.toJSON())
         self.assertDictEqual(clone.file_stats, obs.file_stats)
+
+
+class TestAddRemove(unittest.TestCase):
+
+    def _test(self, left, right, ref_actions):
+        ar = compare.AddRemove()
+        ar.set_left(left)
+        ar.set_right(right)
+        actions = list(ar)
+        self.assertListEqual(actions, ref_actions)
+
+    def test_equal(self):
+        self._test(['z', 'a', 'p'], ['z', 'a', 'p'], [
+                ('equal', 'z'),
+                ('equal', 'a'),
+                ('equal', 'p'),
+            ])
+
+    def test_add_start(self):
+        self._test(['a', 'p'], ['z', 'a', 'p'], [
+                ('add', 'z'),
+                ('equal', 'a'),
+                ('equal', 'p'),
+            ])
+
+    def test_add_middle(self):
+        self._test(['z', 'p'], ['z', 'a', 'p'], [
+                ('equal', 'z'),
+                ('add', 'a'),
+                ('equal', 'p'),
+            ])
+
+    def test_add_end(self):
+        self._test(['z', 'a'], ['z', 'a', 'p'], [
+                ('equal', 'z'),
+                ('equal', 'a'),
+                ('add', 'p'),
+            ])
+
+    def test_delete_start(self):
+        self._test(['z', 'a', 'p'], ['a', 'p'], [
+                ('delete', 'z'),
+                ('equal', 'a'),
+                ('equal', 'p'),
+            ])
+
+    def test_delete_middle(self):
+        self._test(['z', 'a', 'p'], ['z', 'p'], [
+                ('equal', 'z'),
+                ('delete', 'a'),
+                ('equal', 'p'),
+            ])
+
+    def test_delete_end(self):
+        self._test(['z', 'a', 'p'], ['z', 'a'], [
+                ('equal', 'z'),
+                ('equal', 'a'),
+                ('delete', 'p'),
+            ])
+
+    def test_replace_start(self):
+        self._test(['b', 'a', 'p'], ['z', 'a', 'p'], [
+                ('add', 'z'),
+                ('delete', 'b'),
+                ('equal', 'a'),
+                ('equal', 'p'),
+            ])
+
+    def test_replace_middle(self):
+        self._test(['z', 'b', 'p'], ['z', 'a', 'p'], [
+                ('equal', 'z'),
+                ('add', 'a'),
+                ('delete', 'b'),
+                ('equal', 'p'),
+            ])
+
+    def test_replace_end(self):
+        self._test(['z', 'a', 'b'], ['z', 'a', 'p'], [
+                ('equal', 'z'),
+                ('equal', 'a'),
+                ('add', 'p'),
+                ('delete', 'b'),
+            ])
--- a/compare_locales/tests/test_merge.py
+++ b/compare_locales/tests/test_merge.py
@@ -287,21 +287,21 @@ class TestDTD(unittest.TestCase, Content
                     'errors': 1,
                     'missing': 1,
                     'missing_w': 1,
                     'unchanged': 2,
                     'unchanged_w': 2
                 }},
              'details': {
                  'l10n.dtd': [
-                     {'missingEntity': u'bar'},
                      {'error': u'Unparsed content "<!ENTY bar '
                                u'\'gimmick\'>" '
                                u'from line 2 column 1 to '
-                               u'line 2 column 22'}]
+                               u'line 2 column 22'},
+                     {'missingEntity': u'bar'}]
                 }
              })
         mergefile = mozpath.join(self.tmp, "merge", "l10n.dtd")
         self.assertTrue(os.path.isfile(mergefile))
         p = getParser(mergefile)
         p.readFile(mergefile)
         [m, n] = p.parse()
         self.assertEqual(map(lambda e: e.key,  m), ["foo", "eff", "bar"])
@@ -471,27 +471,27 @@ eff = lEff {
                    File(self.l10n, "l10n.ftl", ""),
                    mozpath.join(self.tmp, "merge", "l10n.ftl"))
 
         self.assertDictEqual(
             cc.observers[0].toJSON(),
             {
                 'details': {
                     'l10n.ftl': [
-                        {'missingEntity': u'bar'},
-                        {'missingEntity': u'eff'},
                         {'error': u'Unparsed content "-- Invalid Comment" '
                                   u'from line 1 column 1 '
                                   u'to line 1 column 19'},
                         {'error': u'Unparsed content "bar lBar" '
                                   u'from line 3 column 1 '
                                   u'to line 3 column 9'},
                         {'error': u'Unparsed content "eff = lEff {" '
                                   u'from line 4 column 1 '
                                   u'to line 4 column 13'},
+                        {'missingEntity': u'bar'},
+                        {'missingEntity': u'eff'},
                     ],
                 },
                 'summary': {
                     None: {
                         'changed': 1,
                         'changed_w': 1,
                         'missing': 2,
                         'missing_w': 2,