bug 1368449, data storage and iteration, r=mathjazz
This changes the data storage from a dict like
{
'missingEntity': [
'onekey',
'zoom'
],
'obsoleteEntity': [
'more'
]
}
to a list like
[
{'missingEntity': 'onekey'},
{'obsoleteEntity': 'more'},
{'missingEntity': 'zoom'}
]
With this patch there is only a single location where we
sort, which is going to be good for
bug 1368444.
I had to rip out the use of SequenceMatcher, because that doesn't
persist order of the keys, it always issues "delete" first, and
then "add", regardless whether the added key would come first
or second. And while I was at that, I dropped the whole
construct of yielding the equal key twice. I did that to mimic
SequenceMatcher, and I don't have to. Makes the code easier
to read, too.
MozReview-Commit-ID: HUWnnBvzyAb
--- a/compare_locales/compare.py
+++ b/compare_locales/compare.py
@@ -3,17 +3,16 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
'Mozilla l10n compare locales tool'
import codecs
import os
import shutil
import re
-from difflib import SequenceMatcher
from collections import defaultdict
try:
from json import dumps
except:
from simplejson import dumps
from compare_locales import parser
@@ -87,77 +86,63 @@ class Tree(object):
for child in self.branches[key].getContent(depth + 1):
yield child
def toJSON(self):
'''
Returns this Tree as a JSON-able tree of hashes.
Only the values need to take care that they're JSON-able.
'''
- json = {}
- keys = self.branches.keys()
- keys.sort()
if self.value is not None:
- json['value'] = self.value
- children = [('/'.join(key), self.branches[key].toJSON())
- for key in keys]
- if children:
- json['children'] = children
- return json
+ return self.value
+ return dict(('/'.join(key), self.branches[key].toJSON())
+ for key in self.branches.keys())
def getStrRows(self):
def tostr(t):
if t[1] == 'key':
return self.indent * t[0] + '/'.join(t[2])
return self.indent * (t[0] + 1) + str(t[2])
return map(tostr, self.getContent())
def __str__(self):
return '\n'.join(self.getStrRows())
-class AddRemove(SequenceMatcher):
- def __init__(self):
- SequenceMatcher.__init__(self, None, None, None)
+class AddRemove(object):
+ def __init__(self, key=None):
+ self.left = self.right = None
+ self.key = key
def set_left(self, left):
- if not isinstance(left, list):
- left = [l for l in left]
- self.set_seq1(left)
+ if not isinstance(left, set):
+ left = set(l for l in left)
+ self.left = left
def set_right(self, right):
- if not isinstance(right, list):
- right = [l for l in right]
- self.set_seq2(right)
+ if not isinstance(right, set):
+ right = set(l for l in right)
+ self.right = right
def __iter__(self):
- for tag, i1, i2, j1, j2 in self.get_opcodes():
- if tag == 'equal':
- for pair in zip(self.a[i1:i2], self.b[j1:j2]):
- yield ('equal', pair)
- elif tag == 'delete':
- for item in self.a[i1:i2]:
- yield ('delete', item)
- elif tag == 'insert':
- for item in self.b[j1:j2]:
- yield ('add', item)
+ for item in sorted(self.left | self.right, key=self.key):
+ if item in self.left and item in self.right:
+ yield ('equal', item)
+ elif item in self.left:
+ yield ('delete', item)
else:
- # tag == 'replace'
- for item in self.a[i1:i2]:
- yield ('delete', item)
- for item in self.b[j1:j2]:
- yield ('add', item)
+ yield ('add', item)
class Observer(object):
def __init__(self, filter=None, file_stats=False):
self.summary = defaultdict(lambda: defaultdict(int))
- self.details = Tree(dict)
+ self.details = Tree(list)
self.filter = filter
self.file_stats = None
if file_stats:
self.file_stats = defaultdict(lambda: defaultdict(dict))
# support pickling
def __getstate__(self):
state = dict(summary=self._dictify(self.summary), details=self.details)
@@ -200,54 +185,41 @@ class Observer(object):
return
for category, value in stats.iteritems():
self.summary[file.locale][category] += value
if self.file_stats is None:
return
if 'missingInFiles' in stats:
# keep track of how many strings are in a missing file
# we got the {'missingFile': 'error'} from the notify pass
- self.details[file]['strings'] = stats['missingInFiles']
+ self.details[file].append({'count': stats['missingInFiles']})
# missingInFiles should just be "missing" in file stats
self.file_stats[file.locale][file.localpath]['missing'] = \
stats['missingInFiles']
return # there are no other stats for missing files
self.file_stats[file.locale][file.localpath].update(stats)
def notify(self, category, file, data):
rv = 'error'
if category in ['missingFile', 'obsoleteFile']:
if self.filter is not None:
rv = self.filter(file)
if rv != "ignore":
- self.details[file][category] = rv
+ self.details[file].append({category: rv})
return rv
if category in ['missingEntity', 'obsoleteEntity']:
if self.filter is not None:
rv = self.filter(file, data)
if rv == "ignore":
return rv
- v = self.details[file]
- try:
- v[category].append(data)
- except KeyError:
- v[category] = [data]
+ self.details[file].append({category: data})
return rv
- if category == 'error':
- try:
- self.details[file][category].append(data)
- except KeyError:
- self.details[file][category] = [data]
- self.summary[file.locale]['errors'] += 1
- elif category == 'warning':
- try:
- self.details[file][category].append(data)
- except KeyError:
- self.details[file][category] = [data]
- self.summary[file.locale]['warnings'] += 1
+ if category in ('error', 'warning'):
+ self.details[file].append({category: data})
+ self.summary[file.locale][category + 's'] += 1
return rv
def toExhibit(self):
items = []
for locale in sorted(self.summary.iterkeys()):
summary = self.summary[locale]
if locale is not None:
item = {'id': 'xxx/' + locale,
@@ -304,36 +276,29 @@ class Observer(object):
if type == "json":
return dumps(self.toJSON())
def tostr(t):
if t[1] == 'key':
return ' ' * t[0] + '/'.join(t[2])
o = []
indent = ' ' * (t[0] + 1)
- if 'error' in t[2]:
- o += [indent + 'ERROR: ' + e for e in t[2]['error']]
- if 'warning' in t[2]:
- o += [indent + 'WARNING: ' + e for e in t[2]['warning']]
- if 'missingEntity' in t[2] or 'obsoleteEntity' in t[2]:
- missingEntities = ('missingEntity' in t[2] and
- t[2]['missingEntity']) or []
- obsoleteEntities = ('obsoleteEntity' in t[2] and
- t[2]['obsoleteEntity']) or []
- entities = missingEntities + obsoleteEntities
- entities.sort()
- for entity in entities:
- op = '+'
- if entity in obsoleteEntities:
- op = '-'
- o.append(indent + op + entity)
- elif 'missingFile' in t[2]:
- o.append(indent + '// add and localize this file')
- elif 'obsoleteFile' in t[2]:
- o.append(indent + '// remove this file')
+ for item in t[2]:
+ if 'error' in item:
+ o += [indent + 'ERROR: ' + item['error']]
+ elif 'warning' in item:
+ o += [indent + 'WARNING: ' + item['warning']]
+ elif 'missingEntity' in item:
+ o += [indent + '+' + item['missingEntity']]
+ elif 'obsoleteEntity' in item:
+ o += [indent + '-' + item['obsoleteEntity']]
+ elif 'missingFile' in item:
+ o.append(indent + '// add and localize this file')
+ elif 'obsoleteFile' in item:
+ o.append(indent + '// remove this file')
return '\n'.join(o)
out = []
for locale, summary in sorted(self.summary.iteritems()):
if locale is not None:
out.append(locale + ':')
out += [k + ': ' + str(v) for k, v in sorted(summary.iteritems())]
total = sum([summary[k]
@@ -478,49 +443,48 @@ class ContentComparer:
ar = AddRemove()
ar.set_left(ref_list)
ar.set_right(l10n_list)
report = missing = obsolete = changed = unchanged = keys = 0
missing_w = changed_w = unchanged_w = 0 # word stats
missings = []
skips = []
checker = getChecker(l10n, reference=ref[0], extra_tests=extra_tests)
- for action, item_or_pair in ar:
+ for action, entity in ar:
if action == 'delete':
# missing entity
- _rv = self.notify('missingEntity', l10n, item_or_pair)
+ _rv = self.notify('missingEntity', l10n, entity)
if _rv == "ignore":
continue
if _rv == "error":
# only add to missing entities for l10n-merge on error,
# not report
- missings.append(item_or_pair)
+ missings.append(entity)
missing += 1
- refent = ref[0][ref[1][item_or_pair]]
+ refent = ref[0][ref[1][entity]]
missing_w += self.countWords(refent.val)
else:
# just report
report += 1
elif action == 'add':
# obsolete entity or junk
- if isinstance(l10n_entities[l10n_map[item_or_pair]],
+ if isinstance(l10n_entities[l10n_map[entity]],
parser.Junk):
- junk = l10n_entities[l10n_map[item_or_pair]]
+ junk = l10n_entities[l10n_map[entity]]
params = (junk.val,) + junk.position() + junk.position(-1)
self.notify('error', l10n,
'Unparsed content "%s" from line %d column %d'
' to line %d column %d' % params)
if self.merge_stage is not None:
skips.append(junk)
elif self.notify('obsoleteEntity', l10n,
- item_or_pair) != 'ignore':
+ entity) != 'ignore':
obsolete += 1
else:
# entity found in both ref and l10n, check for changed
- entity = item_or_pair[0]
refent = ref[0][ref[1][entity]]
l10nent = l10n_entities[l10n_map[entity]]
if self.keyRE.search(entity):
keys += 1
else:
if refent.val == l10nent.val:
self.doUnchanged(l10nent)
unchanged += 1
--- a/compare_locales/tests/test_compare.py
+++ b/compare_locales/tests/test_compare.py
@@ -33,24 +33,20 @@ class TestTree(unittest.TestCase):
(1, 'value', {'leaf': 1}),
(0, 'key', ('two', 'other')),
(1, 'value', {'leaf': 2})
]
)
self.assertDictEqual(
tree.toJSON(),
{
- 'children': [
- ('one/entry',
- {'value': {'leaf': 1}}
- ),
- ('two/other',
- {'value': {'leaf': 2}}
- )
- ]
+ 'one/entry':
+ {'leaf': 1},
+ 'two/other':
+ {'leaf': 2}
}
)
self.assertMultiLineEqual(
str(tree),
'''\
one/entry
{'leaf': 1}
two/other
@@ -70,106 +66,111 @@ two/other
(2, 'value', {'leaf': 1}),
(1, 'key', ('other',)),
(2, 'value', {'leaf': 2})
]
)
self.assertDictEqual(
tree.toJSON(),
{
- 'children': [
- ('one', {
- 'children': [
- ('entry',
- {'value': {'leaf': 1}}
- ),
- ('other',
- {'value': {'leaf': 2}}
- )
- ]
- })
- ]
+ 'one': {
+ 'entry':
+ {'leaf': 1},
+ 'other':
+ {'leaf': 2}
+ }
}
)
class TestObserver(unittest.TestCase):
def test_simple(self):
obs = compare.Observer()
f = paths.File('/some/real/sub/path', 'sub/path', locale='de')
- obs.notify('missingEntity', f, ['one', 'two'])
+ obs.notify('missingEntity', f, 'one')
+ obs.notify('missingEntity', f, 'two')
obs.updateStats(f, {'missing': 15})
self.assertDictEqual(obs.toJSON(), {
'summary': {
'de': {
'missing': 15
}
},
'details': {
- 'children': [
- ('de/sub/path',
- {'value': {'missingEntity': [['one', 'two']]}}
- )
- ]
+ 'de/sub/path':
+ [{'missingEntity': 'one'},
+ {'missingEntity': 'two'}]
}
})
clone = loads(dumps(obs))
self.assertDictEqual(clone.summary, obs.summary)
self.assertDictEqual(clone.details.toJSON(), obs.details.toJSON())
self.assertIsNone(clone.file_stats)
def test_module(self):
obs = compare.Observer(file_stats=True)
f = paths.File('/some/real/sub/path', 'path',
module='sub', locale='de')
- obs.notify('missingEntity', f, ['one', 'two'])
+ obs.notify('missingEntity', f, 'one')
+ obs.notify('obsoleteEntity', f, 'bar')
+ obs.notify('missingEntity', f, 'two')
obs.updateStats(f, {'missing': 15})
self.assertDictEqual(obs.toJSON(), {
'summary': {
'de': {
'missing': 15
}
},
'details': {
- 'children': [
- ('de/sub/path',
- {'value': {'missingEntity': [['one', 'two']]}}
- )
- ]
+ 'de/sub/path':
+ [
+ {'missingEntity': 'one'},
+ {'obsoleteEntity': 'bar'},
+ {'missingEntity': 'two'},
+ ]
}
})
self.assertDictEqual(obs.file_stats, {
'de': {
'sub/path': {
'missing': 15
}
}
})
+ self.assertEqual(obs.serialize(), '''\
+de/sub/path
+ +one
+ -bar
+ +two
+de:
+missing: 15
+0% of entries changed''')
clone = loads(dumps(obs))
self.assertDictEqual(clone.summary, obs.summary)
self.assertDictEqual(clone.details.toJSON(), obs.details.toJSON())
self.assertDictEqual(clone.file_stats, obs.file_stats)
def test_file_stats(self):
obs = compare.Observer(file_stats=True)
f = paths.File('/some/real/sub/path', 'sub/path', locale='de')
- obs.notify('missingEntity', f, ['one', 'two'])
+ obs.notify('missingEntity', f, 'one')
+ obs.notify('missingEntity', f, 'two')
obs.updateStats(f, {'missing': 15})
self.assertDictEqual(obs.toJSON(), {
'summary': {
'de': {
'missing': 15
}
},
'details': {
- 'children': [
- ('de/sub/path',
- {'value': {'missingEntity': [['one', 'two']]}}
- )
- ]
+ 'de/sub/path':
+ [
+ {'missingEntity': 'one'},
+ {'missingEntity': 'two'},
+ ]
}
})
self.assertDictEqual(obs.file_stats, {
'de': {
'sub/path': {
'missing': 15
}
}
--- a/compare_locales/tests/test_merge.py
+++ b/compare_locales/tests/test_merge.py
@@ -79,23 +79,21 @@ eff = effVal""")
{'summary':
{None: {
'changed': 1,
'changed_w': 1,
'missing': 2,
'missing_w': 2
}},
'details': {
- 'children': [
- ('l10n.properties',
- {'value': {'missingEntity': [u'eff', u'foo']}}
- )
- ]}
- }
- )
+ 'l10n.properties': [
+ {'missingEntity': u'eff'},
+ {'missingEntity': u'foo'}]
+ }
+ })
mergefile = mozpath.join(self.tmp, "merge", "l10n.properties")
self.assertTrue(os.path.isfile(mergefile))
p = getParser(mergefile)
p.readFile(mergefile)
[m, n] = p.parse()
self.assertEqual(map(lambda e: e.key, m), ["bar", "eff", "foo"])
def testError(self):
@@ -117,26 +115,22 @@ eff = leffVal
{None: {
'changed': 2,
'changed_w': 3,
'errors': 1,
'missing': 1,
'missing_w': 1
}},
'details': {
- 'children': [
- ('l10n.properties',
- {'value': {
- 'error': [u'argument 1 `S` should be `d` '
- u'at line 1, column 7 for bar'],
- 'missingEntity': [u'foo']}}
- )
- ]}
- }
- )
+ 'l10n.properties': [
+ {'error': u'argument 1 `S` should be `d` '
+ u'at line 1, column 7 for bar'},
+ {'missingEntity': u'foo'}]
+ }
+ })
mergefile = mozpath.join(self.tmp, "merge", "l10n.properties")
self.assertTrue(os.path.isfile(mergefile))
p = getParser(mergefile)
p.readFile(mergefile)
[m, n] = p.parse()
self.assertEqual([e.key for e in m], ["eff", "foo", "bar"])
self.assertEqual(m[n['bar']].val, '%d barVal')
@@ -158,21 +152,20 @@ eff = leffVal
{None: {
'changed': 1,
'changed_w': 1,
'obsolete': 1,
'unchanged': 1,
'unchanged_w': 1
}},
'details': {
- 'children': [
- ('l10n.properties',
- {'value': {'obsoleteEntity': [u'other']}})]},
- }
- )
+ 'l10n.properties': [
+ {'obsoleteEntity': u'other'}]
+ }
+ })
class TestDTD(unittest.TestCase, ContentMixin):
extension = '.dtd'
def setUp(self):
self.maxDiff = None
self.tmp = mkdtemp()
@@ -224,23 +217,21 @@ class TestDTD(unittest.TestCase, Content
{'summary':
{None: {
'changed': 1,
'changed_w': 1,
'missing': 2,
'missing_w': 2
}},
'details': {
- 'children': [
- ('l10n.dtd',
- {'value': {'missingEntity': [u'eff', u'foo']}}
- )
- ]}
- }
- )
+ 'l10n.dtd': [
+ {'missingEntity': u'eff'},
+ {'missingEntity': u'foo'}]
+ }
+ })
mergefile = mozpath.join(self.tmp, "merge", "l10n.dtd")
self.assertTrue(os.path.isfile(mergefile))
p = getParser(mergefile)
p.readFile(mergefile)
[m, n] = p.parse()
self.assertEqual(map(lambda e: e.key, m), ["bar", "eff", "foo"])
def testJunk(self):
@@ -262,28 +253,24 @@ class TestDTD(unittest.TestCase, Content
{None: {
'errors': 1,
'missing': 1,
'missing_w': 1,
'unchanged': 2,
'unchanged_w': 2
}},
'details': {
- 'children': [
- ('l10n.dtd',
- {'value': {
- 'error': [u'Unparsed content "<!ENTY bar '
- u'\'gimmick\'>" '
- u'from line 2 column 1 to '
- u'line 2 column 22'],
- 'missingEntity': [u'bar']}}
- )
- ]}
- }
- )
+ 'l10n.dtd': [
+ {'error': u'Unparsed content "<!ENTY bar '
+ u'\'gimmick\'>" '
+ u'from line 2 column 1 to '
+ u'line 2 column 22'},
+ {'missingEntity': u'bar'}]
+ }
+ })
mergefile = mozpath.join(self.tmp, "merge", "l10n.dtd")
self.assertTrue(os.path.isfile(mergefile))
p = getParser(mergefile)
p.readFile(mergefile)
[m, n] = p.parse()
self.assertEqual(map(lambda e: e.key, m), ["foo", "eff", "bar"])