Bug 1199670 - Add Fluent support to compare-locales. r=Pike
authorStaś Małolepszy <stas@mozilla.com>
Thu, 22 Jun 2017 15:05:03 +0200
changeset 277 c0aa94185f560f909677e8cb11a4ff8e776d1f76
parent 271 4a0b255d87cd4338299285cf08fba7a97a7271e5
child 278 868e29f6439c679b15972930e47f90ece2e4cf65
push id77
push usersmalolepszy@mozilla.com
push dateTue, 27 Jun 2017 17:11:36 +0000
reviewersPike
bugs1199670
Bug 1199670 - Add Fluent support to compare-locales. r=Pike MozReview-Commit-ID: DsTC6KDFGpT
.gitignore
.hgignore
compare_locales/checks.py
compare_locales/compare.py
compare_locales/parser.py
compare_locales/tests/test_compare.py
compare_locales/tests/test_dtd.py
compare_locales/tests/test_ftl.py
compare_locales/tests/test_merge.py
setup.py
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,9 @@
 *.orig
 *.pyc
 build/
 dist/
 compare_locales.egg-info/
+.eggs/
 .tox/
 .coverage
 htmlcov/
--- a/.hgignore
+++ b/.hgignore
@@ -1,8 +1,9 @@
 \.orig$
 \.pyc$
 ^build$
 ^dist$
 ^compare_locales.egg-info$
+^.eggs$
 ^.tox$
 ^.coverage$
 ^htmlcov$
--- a/compare_locales/checks.py
+++ b/compare_locales/checks.py
@@ -417,14 +417,61 @@ class DTDChecker(Checker):
                           u"or \\u0022, or put string in apostrophes."
                 else:
                     msg = u"Apostrophes in Android DTDs need escaping with "\
                           u"\\' or \\u0027, or use \u2019, or put string in "\
                           u"quotes."
                 yield ('error', m.end(0)+offset, msg, 'android')
 
 
+class FluentChecker(Checker):
+    '''Tests to run on Fluent (FTL) files.
+    '''
+    pattern = re.compile('.*\.ftl')
+
+    # Positions yielded by FluentChecker.check are absolute offsets from the
+    # beginning of the file.  This is different from the base Checker behavior
+    # which yields offsets from the beginning of the current entity's value.
+    def check(self, refEnt, l10nEnt):
+        ref_entry = refEnt.entry
+        l10n_entry = l10nEnt.entry
+        # verify that values match, either both have a value or none
+        if ref_entry.value is not None and l10n_entry.value is None:
+            yield ('error', l10n_entry.span.start,
+                   'Missing value', 'fluent')
+        if ref_entry.value is None and l10n_entry.value is not None:
+            yield ('error', l10n_entry.value.span.start,
+                   'Obsolete value', 'fluent')
+
+        # verify that we're having the same set of attributes
+        ref_attr_names = set((attr.id.name for attr in ref_entry.attributes))
+        ref_pos = dict((attr.id.name, i)
+                       for i, attr in enumerate(ref_entry.attributes))
+        l10n_attr_names = set((attr.id.name for attr in l10n_entry.attributes))
+        l10n_pos = dict((attr.id.name, i)
+                        for i, attr in enumerate(l10n_entry.attributes))
+
+        missing_attr_names = sorted(ref_attr_names - l10n_attr_names,
+                                    key=lambda k: ref_pos[k])
+        for attr_name in missing_attr_names:
+            yield ('error', l10n_entry.span.start,
+                   'Missing attribute: ' + attr_name, 'fluent')
+
+        obsolete_attr_names = sorted(l10n_attr_names - ref_attr_names,
+                                     key=lambda k: l10n_pos[k])
+        obsolete_attrs = [
+            attr
+            for attr in l10n_entry.attributes
+            if attr.id.name in obsolete_attr_names
+        ]
+        for attr in obsolete_attrs:
+            yield ('error', attr.span.start,
+                   'Obsolete attribute: ' + attr.id.name, 'fluent')
+
+
 def getChecker(file, reference=None, extra_tests=None):
     if PropertiesChecker.use(file):
         return PropertiesChecker(extra_tests)
     if DTDChecker.use(file):
         return DTDChecker(extra_tests, reference)
+    if FluentChecker.use(file):
+        return FluentChecker(extra_tests)
     return None
--- a/compare_locales/compare.py
+++ b/compare_locales/compare.py
@@ -346,53 +346,77 @@ class ContentComparer:
         of the notify method are used to control the handling of missing
         entities.
         '''
         self.observers = observers
         if stat_observers is None:
             stat_observers = []
         self.stat_observers = stat_observers
 
-    def merge(self, ref_entities, ref_map, ref_file, l10n_file, merge_file,
-              missing, skips, ctx, canMerge, encoding):
+    def create_merge_dir(self, merge_file):
         outdir = mozpath.dirname(merge_file)
         if not os.path.isdir(outdir):
             os.makedirs(outdir)
-        if not canMerge:
+
+    def merge(self, ref_entities, ref_map, ref_file, l10n_file, merge_file,
+              missing, skips, ctx, capabilities, encoding):
+
+        if capabilities == parser.CAN_NONE:
+            return
+
+        if capabilities & parser.CAN_COPY and (skips or missing):
+            self.create_merge_dir(merge_file)
             shutil.copyfile(ref_file.fullpath, merge_file)
             print "copied reference to " + merge_file
             return
+
+        if not (capabilities & parser.CAN_SKIP):
+            return
+
+        # Start with None in case the merge file doesn't need to be created.
+        f = None
+
         if skips:
             # skips come in ordered by key name, we need them in file order
             skips.sort(key=lambda s: s.span[0])
-        trailing = (['\n'] +
-                    [ref_entities[ref_map[key]].all for key in missing] +
-                    [ref_entities[ref_map[skip.key]].all for skip in skips
-                     if not isinstance(skip, parser.Junk)])
-        if skips:
-            # we need to skip a few errornous blocks in the input, copy by hand
+
+            # we need to skip a few erroneous blocks in the input, copy by hand
+            self.create_merge_dir(merge_file)
             f = codecs.open(merge_file, 'wb', encoding)
             offset = 0
             for skip in skips:
                 chunk = skip.span
                 f.write(ctx.contents[offset:chunk[0]])
                 offset = chunk[1]
             f.write(ctx.contents[offset:])
-        else:
-            shutil.copyfile(l10n_file.fullpath, merge_file)
-            f = codecs.open(merge_file, 'ab', encoding)
-        print "adding to " + merge_file
+
+        if not (capabilities & parser.CAN_MERGE):
+            return
+
+        if skips or missing:
+            if f is None:
+                self.create_merge_dir(merge_file)
+                shutil.copyfile(l10n_file.fullpath, merge_file)
+                f = codecs.open(merge_file, 'ab', encoding)
 
-        def ensureNewline(s):
-            if not s.endswith('\n'):
-                return s + '\n'
-            return s
+            trailing = (['\n'] +
+                        [ref_entities[ref_map[key]].all for key in missing] +
+                        [ref_entities[ref_map[skip.key]].all for skip in skips
+                         if not isinstance(skip, parser.Junk)])
 
-        f.write(''.join(map(ensureNewline, trailing)))
-        f.close()
+            def ensureNewline(s):
+                if not s.endswith('\n'):
+                    return s + '\n'
+                return s
+
+            print "adding to " + merge_file
+            f.write(''.join(map(ensureNewline, trailing)))
+
+        if f is not None:
+            f.close()
 
     def notify(self, category, file, data):
         """Check observer for the found data, and if it's
         not to ignore, notify stat_observers.
         """
         rvs = set(
             observer.notify(category, file, data)
             for observer in self.observers
@@ -410,27 +434,16 @@ class ContentComparer:
 
     def updateStats(self, file, stats):
         """Check observer for the found data, and if it's
         not to ignore, notify stat_observers.
         """
         for observer in self.observers + self.stat_observers:
             observer.updateStats(file, stats)
 
-    br = re.compile('<br\s*/?>', re.U)
-    sgml = re.compile('</?\w+.*?>', re.U | re.M)
-
-    def countWords(self, value):
-        """Count the words in an English string.
-        Replace a couple of xml markup to make that safer, too.
-        """
-        value = self.br.sub(u'\n', value)
-        value = self.sgml.sub(u'', value)
-        return len(value.split())
-
     def remove(self, obsolete):
         self.notify('obsoleteFile', obsolete, None)
         pass
 
     def compare(self, ref_file, l10n, merge_file, extra_tests=None):
         try:
             p = parser.getParser(ref_file.file)
         except UserWarning:
@@ -470,17 +483,17 @@ class ContentComparer:
                 if _rv == "ignore":
                     continue
                 if _rv == "error":
                     # only add to missing entities for l10n-merge on error,
                     # not report
                     missings.append(entity)
                     missing += 1
                     refent = ref[0][ref[1][entity]]
-                    missing_w += self.countWords(refent.val)
+                    missing_w += refent.count_words()
                 else:
                     # just report
                     report += 1
             elif action == 'add':
                 # obsolete entity or junk
                 if isinstance(l10n_entities[l10n_map[entity]],
                               parser.Junk):
                     junk = l10n_entities[l10n_map[entity]]
@@ -495,51 +508,42 @@ class ContentComparer:
                     obsolete += 1
             else:
                 # entity found in both ref and l10n, check for changed
                 refent = ref[0][ref[1][entity]]
                 l10nent = l10n_entities[l10n_map[entity]]
                 if self.keyRE.search(entity):
                     keys += 1
                 else:
-                    if refent.val == l10nent.val:
+                    if refent == l10nent:
                         self.doUnchanged(l10nent)
                         unchanged += 1
-                        unchanged_w += self.countWords(refent.val)
+                        unchanged_w += refent.count_words()
                     else:
                         self.doChanged(ref_file, refent, l10nent)
                         changed += 1
-                        changed_w += self.countWords(refent.val)
+                        changed_w += refent.count_words()
                         # run checks:
                 if checker:
                     for tp, pos, msg, cat in checker.check(refent, l10nent):
-                        # compute real src position, if first line,
-                        # col needs adjustment
-                        if isinstance(pos, tuple):
-                            _l, col = l10nent.value_position()
-                            # line, column
-                            if pos[0] == 1:
-                                col = col + pos[1]
-                            else:
-                                col = pos[1]
-                                _l += pos[0] - 1
-                        else:
-                            _l, col = l10nent.value_position(pos)
+                        line, col = l10nent.value_position(pos)
                         # skip error entities when merging
                         if tp == 'error' and merge_file is not None:
                             skips.append(l10nent)
                         self.notify(tp, l10n,
                                     u"%s at line %d, column %d for %s" %
-                                    (msg, _l, col, refent.key))
+                                    (msg, line, col, refent.key))
                 pass
-        if merge_file is not None and (missings or skips):
+
+        if merge_file is not None:
             self.merge(
                 ref[0], ref[1], ref_file,
                 l10n, merge_file, missings, skips, l10n_ctx,
-                p.canMerge, p.encoding)
+                p.capabilities, p.encoding)
+
         stats = {}
         for cat, value in (
                 ('missing', missing),
                 ('missing_w', missing_w),
                 ('report', report),
                 ('obsolete', obsolete),
                 ('changed', changed),
                 ('changed_w', changed_w),
@@ -566,17 +570,17 @@ class ContentComparer:
         except Exception, ex:
             self.notify('error', f, str(ex))
             return
         # strip parse errors
         entities = [e for e in entities if not isinstance(e, parser.Junk)]
         self.updateStats(missing, {'missingInFiles': len(entities)})
         missing_w = 0
         for e in entities:
-            missing_w += self.countWords(e.val)
+            missing_w += e.count_words()
         self.updateStats(missing, {'missing_w': missing_w})
 
     def doUnchanged(self, entity):
         # overload this if needed
         pass
 
     def doChanged(self, file, ref_entity, l10n_entity):
         # overload this if needed
--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -2,19 +2,35 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 import re
 import bisect
 import codecs
 import logging
 
+from fluent.syntax import FluentParser as FTLParser
+from fluent.syntax import ast as ftl
+
 __constructors = []
 
 
+# The allowed capabilities for the Parsers.  They define the exact strategy
+# used by ContentComparer.merge.
+
+# Don't perform any merging
+CAN_NONE = 0
+# Copy the entire reference file
+CAN_COPY = 1
+# Remove broken entities from localization
+CAN_SKIP = 2
+# Add missing and broken entities from the reference to localization
+CAN_MERGE = 4
+
+
 class EntityBase(object):
     '''
     Abstraction layer for a localizable entity.
     Currently supported are grammars of the form:
 
     1: pre white space
     2: entity definition
     3: entity key (name)
@@ -94,19 +110,45 @@ class EntityBase(object):
     key = property(get_key)
     val = property(get_val)
     raw_val = property(get_raw_val)
     post = property(get_post)
 
     def __repr__(self):
         return self.key
 
+    re_br = re.compile('<br\s*/?>', re.U)
+    re_sgml = re.compile('</?\w+.*?>', re.U | re.M)
+
+    def count_words(self):
+        """Count the words in an English string.
+        Replace a couple of xml markup to make that safer, too.
+        """
+        value = self.re_br.sub(u'\n', self.val)
+        value = self.re_sgml.sub(u'', value)
+        return len(value.split())
+
+    def __eq__(self, other):
+        return self.key == other.key and self.val == other.val
+
 
 class Entity(EntityBase):
-    pass
+    def value_position(self, offset=0):
+        # DTDChecker already returns tuples of (line, col) positions
+        if isinstance(offset, tuple):
+            line_pos, col_pos = offset
+            line, col = super(Entity, self).value_position()
+            if line_pos == 1:
+                col = col + col_pos
+            else:
+                col = col_pos
+                line += line_pos - 1
+            return line, col
+        else:
+            return super(Entity, self).value_position(offset)
 
 
 class Comment(EntityBase):
     def __init__(self, ctx, span, pre_ws_span, def_span,
                  post_span):
         self.ctx = ctx
         self.span = span
         self.pre_ws_span = pre_ws_span
@@ -175,18 +217,18 @@ class Whitespace(EntityBase):
         self.def_span = self.pre_ws_span = (span[0], span[0])
         self.post_span = (span[1], span[1])
         self.pp = lambda v: v
 
     def __repr__(self):
         return self.raw_val
 
 
-class Parser:
-    canMerge = True
+class Parser(object):
+    capabilities = CAN_SKIP | CAN_MERGE
     tail = re.compile('\s+\Z')
 
     class Context(object):
         "Fixture for content and line numbers"
         def __init__(self, contents):
             self.contents = contents
             self._lines = None
 
@@ -463,17 +505,17 @@ class DefinesInstruction(EntityBase):
         self.pp = lambda v: v
 
     def __repr__(self):
         return self.raw_val
 
 
 class DefinesParser(Parser):
     # can't merge, #unfilter needs to be the last item, which we don't support
-    canMerge = False
+    capabilities = CAN_COPY
     tail = re.compile(r'(?!)')  # never match
 
     def __init__(self):
         self.reComment = re.compile(
             '((?:[ \t]*\n)*)'
             '((?:^# .*?(?:\n|\Z))+)'
             '((?:[ \t]*(?:\n|\Z))*)', re.M)
         self.reKey = re.compile('((?:[ \t]*\n)*)'
@@ -561,12 +603,96 @@ class IniParser(Parser):
         m = self.reKey.match(contents, offset)
         if m:
             offset = m.end()
             return (self.createEntity(ctx, m), offset)
         return self.getTrailing(ctx, offset,
                                 self.reComment, self.reSection, self.reKey)
 
 
+class FluentEntity(Entity):
+    # Fields ignored when comparing two entities.
+    ignored_fields = ['comment', 'span']
+
+    def __init__(self, ctx, entry):
+        start = entry.span.start
+        end = entry.span.end
+
+        self.ctx = ctx
+        self.span = (start, end)
+
+        self.key_span = (entry.id.span.start, entry.id.span.end)
+
+        if entry.value is not None:
+            self.val_span = (entry.value.span.start, entry.value.span.end)
+        else:
+            self.val_span = (0, 0)
+
+        self.entry = entry
+
+    def pp(self, value):
+        # XXX Normalize whitespace?
+        return value
+
+    _word_count = None
+
+    def count_words(self):
+        if self._word_count is None:
+            self._word_count = 0
+
+            def count_words(node):
+                if isinstance(node, ftl.TextElement):
+                    self._word_count += len(node.value.split())
+                return node
+
+            self.entry.traverse(count_words)
+
+        return self._word_count
+
+    def __eq__(self, other):
+        return self.entry.equals(
+            other.entry, ignored_fields=self.ignored_fields)
+
+    # Positions yielded by FluentChecker.check are absolute offsets from the
+    # beginning of the file.  This is different from the base Checker behavior
+    # which yields offsets from the beginning of the current entity's value.
+    def position(self, pos=None):
+        if pos is None:
+            pos = self.entry.span.start
+        return self.ctx.lines(pos)[0]
+
+    # FluentEntities don't differentiate between entity and value positions
+    # because all positions are absolute from the beginning of the file.
+    def value_position(self, pos=None):
+        return self.position(pos)
+
+
+class FluentParser(Parser):
+    capabilities = CAN_SKIP
+
+    def __init__(self):
+        super(FluentParser, self).__init__()
+        self.ftl_parser = FTLParser()
+
+    def walk(self, onlyEntities=False):
+        if not self.ctx:
+            # loading file failed, or we just didn't load anything
+            return
+        resource = self.ftl_parser.parse(self.ctx.contents)
+        for entry in resource.body:
+            if isinstance(entry, ftl.Message):
+                yield FluentEntity(self.ctx, entry)
+            elif isinstance(entry, ftl.Junk):
+                start = entry.span.start
+                end = entry.span.end
+                # strip leading whitespace
+                start += re.match('\s*', entry.content).end()
+                # strip trailing whitespace
+                ws, we = re.search('\s*$', entry.content).span()
+                end -= we - ws
+                yield Junk(self.ctx, (start, end))
+
+
 __constructors = [('\\.dtd$', DTDParser()),
                   ('\\.properties$', PropertiesParser()),
                   ('\\.ini$', IniParser()),
-                  ('\\.inc$', DefinesParser())]
+                  ('\\.inc$', DefinesParser()),
+                  ('\\.ftl$', FluentParser())]
--- a/compare_locales/tests/test_compare.py
+++ b/compare_locales/tests/test_compare.py
@@ -174,17 +174,8 @@ 0% of entries changed''')
                     'missing': 15
                 }
             }
         })
         clone = loads(dumps(obs))
         self.assertDictEqual(clone.summary, obs.summary)
         self.assertDictEqual(clone.details.toJSON(), obs.details.toJSON())
         self.assertDictEqual(clone.file_stats, obs.file_stats)
-
-
-class TestContentComparer(unittest.TestCase):
-    def test_word_count(self):
-        cc = compare.ContentComparer([])
-        self.assertEqual(cc.countWords('one'), 1)
-        self.assertEqual(cc.countWords('one<br>two'), 2)
-        self.assertEqual(cc.countWords('one<span>word</span>'), 1)
-        self.assertEqual(cc.countWords('one <a href="foo">two</a> three'), 3)
--- a/compare_locales/tests/test_dtd.py
+++ b/compare_locales/tests/test_dtd.py
@@ -127,11 +127,24 @@ escaped value">
     def test_post(self):
         self.parser.readContents('<!ENTITY a "a"><!ENTITY b "b">')
         a, b = list(self.parser)
         self.assertEqual(a.post, '')
         self.parser.readContents('<!ENTITY a "a"> <!ENTITY b "b">')
         a, b = list(self.parser)
         self.assertEqual(a.post, ' ')
 
+    def test_word_count(self):
+        self.parser.readContents('''\
+<!ENTITY a "one">
+<!ENTITY b "one<br>two">
+<!ENTITY c "one<span>word</span>">
+<!ENTITY d "one <a href='foo'>two</a> three">
+''')
+        a, b, c, d = list(self.parser)
+        self.assertEqual(a.count_words(), 1)
+        self.assertEqual(b.count_words(), 2)
+        self.assertEqual(c.count_words(), 1)
+        self.assertEqual(d.count_words(), 3)
+
 
 if __name__ == '__main__':
     unittest.main()
new file mode 100644
--- /dev/null
+++ b/compare_locales/tests/test_ftl.py
@@ -0,0 +1,126 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import unittest
+
+from compare_locales.tests import ParserTestMixin
+
+
+class TestFluentParser(ParserTestMixin, unittest.TestCase):
+    maxDiff = None
+    filename = 'foo.ftl'
+
+    def test_equality_same(self):
+        source = 'progress = Progress: { NUMBER($num, style: "percent") }.'
+
+        self.parser.readContents(source)
+        [ent1] = list(self.parser)
+
+        self.parser.readContents(source)
+        [ent2] = list(self.parser)
+
+        self.assertEqual(ent1, ent2)
+
+    def test_equality_different_whitespace(self):
+        source1 = 'foo = { $arg }'
+        source2 = 'foo = {    $arg    }'
+
+        self.parser.readContents(source1)
+        [ent1] = list(self.parser)
+
+        self.parser.readContents(source2)
+        [ent2] = list(self.parser)
+
+        self.assertEqual(ent1, ent2)
+
+    def test_word_count(self):
+        self.parser.readContents('''\
+a = One
+b = One two three
+c = One { $arg } two
+d =
+    One { $arg ->
+       *[x] Two three
+        [y] Four
+    } five.
+e
+    .attr = One
+f
+    .attr1 = One
+    .attr2 = Two
+g = One two
+    .attr = Three
+h =
+    One { $arg ->
+       *[x] Two three
+        [y] Four
+    } five.
+    .attr1 =
+        Six { $arg ->
+           *[x] Seven eight
+            [y] Nine
+        } ten.
+''')
+
+        a, b, c, d, e, f, g, h = list(self.parser)
+        self.assertEqual(a.count_words(), 1)
+        self.assertEqual(b.count_words(), 3)
+        self.assertEqual(c.count_words(), 2)
+        self.assertEqual(d.count_words(), 5)
+        self.assertEqual(e.count_words(), 1)
+        self.assertEqual(f.count_words(), 2)
+        self.assertEqual(g.count_words(), 3)
+        self.assertEqual(h.count_words(), 10)
+
+    def test_simple_message(self):
+        self.parser.readContents('a = A')
+
+        [a] = list(self.parser)
+        self.assertEqual(a.key, 'a')
+        self.assertEqual(a.val, 'A')
+        self.assertEqual(a.all, 'a = A')
+
+    def test_complex_message(self):
+        self.parser.readContents('abc = A { $arg } B { msg } C')
+
+        [abc] = list(self.parser)
+        self.assertEqual(abc.key, 'abc')
+        self.assertEqual(abc.val, 'A { $arg } B { msg } C')
+        self.assertEqual(abc.all, 'abc = A { $arg } B { msg } C')
+
+    def test_multiline_message(self):
+        self.parser.readContents('''\
+abc =
+    A
+    B
+    C
+''')
+
+        [abc] = list(self.parser)
+        self.assertEqual(abc.key, 'abc')
+        self.assertEqual(abc.val, '\n    A\n    B\n    C')
+        self.assertEqual(abc.all, 'abc =\n    A\n    B\n    C')
+
+    def test_message_with_attribute(self):
+        self.parser.readContents('''\
+abc = ABC
+    .attr = Attr
+''')
+
+        [abc] = list(self.parser)
+        self.assertEqual(abc.key, 'abc')
+        self.assertEqual(abc.val, 'ABC')
+        self.assertEqual(abc.all, 'abc = ABC\n    .attr = Attr')
+
+    def test_message_with_attribute_and_no_value(self):
+        self.parser.readContents('''\
+abc
+    .attr = Attr
+''')
+
+        [abc] = list(self.parser)
+        self.assertEqual(abc.key, 'abc')
+        self.assertEqual(abc.val, '')
+        self.assertEqual(abc.all, 'abc\n    .attr = Attr')
--- a/compare_locales/tests/test_merge.py
+++ b/compare_locales/tests/test_merge.py
@@ -325,10 +325,370 @@ class TestDTD(unittest.TestCase, Content
              'details': {
                  'l10n.dtd': [
                      {'warning': u"can't parse en-US value at line 1, "
                                  u"column 0 for bar"}]
                 }
              })
 
 
+class TestFluent(unittest.TestCase):
+    maxDiff = None  # we got big dictionaries to compare
+
+    def reference(self, content):
+        self.ref = os.path.join(self.tmp, "en-reference.ftl")
+        open(self.ref, "w").write(content)
+
+    def localized(self, content):
+        self.l10n = os.path.join(self.tmp, "l10n.ftl")
+        open(self.l10n, "w").write(content)
+
+    def setUp(self):
+        self.tmp = mkdtemp()
+        os.mkdir(os.path.join(self.tmp, "merge"))
+        self.ref = self.l10n = None
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp)
+        del self.tmp
+        del self.ref
+        del self.l10n
+
+    def testGood(self):
+        self.reference("""\
+foo = fooVal
+bar = barVal
+eff = effVal
+""")
+        self.localized("""\
+foo = lFoo
+bar = lBar
+eff = lEff
+""")
+        cc = ContentComparer([Observer()])
+        cc.compare(File(self.ref, "en-reference.ftl", ""),
+                   File(self.l10n, "l10n.ftl", ""),
+                   mozpath.join(self.tmp, "merge", "l10n.ftl"))
+
+        self.assertDictEqual(
+            cc.observers[0].toJSON(),
+            {'summary':
+                {None: {
+                    'changed': 3,
+                    'changed_w': 3
+                }},
+             'details': {}
+             }
+        )
+
+        # validate merge results
+        mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl")
+        self.assert_(not os.path.exists(mergepath))
+
+    def testMissing(self):
+        self.reference("""\
+foo = fooVal
+bar = barVal
+eff = effVal
+""")
+        self.localized("""\
+foo = lFoo
+eff = lEff
+""")
+        cc = ContentComparer([Observer()])
+        cc.compare(File(self.ref, "en-reference.ftl", ""),
+                   File(self.l10n, "l10n.ftl", ""),
+                   mozpath.join(self.tmp, "merge", "l10n.ftl"))
+
+        self.assertDictEqual(
+            cc.observers[0].toJSON(),
+            {
+                'details': {
+                    'l10n.ftl': [
+                        {'missingEntity': u'bar'}
+                    ],
+                },
+                'summary': {
+                    None: {
+                        'changed': 2,
+                        'changed_w': 2,
+                        'missing': 1,
+                        'missing_w': 1
+                    }
+                }
+            }
+        )
+
+        # validate merge results
+        mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl")
+        self.assert_(not os.path.exists(mergepath))
+
+    def testBroken(self):
+        self.reference("""\
+foo = fooVal
+bar = barVal
+eff = effVal
+""")
+        self.localized("""\
+-- Invalid Comment
+foo = lFoo
+bar lBar
+eff = lEff {
+""")
+        cc = ContentComparer([Observer()])
+        cc.compare(File(self.ref, "en-reference.ftl", ""),
+                   File(self.l10n, "l10n.ftl", ""),
+                   mozpath.join(self.tmp, "merge", "l10n.ftl"))
+
+        self.assertDictEqual(
+            cc.observers[0].toJSON(),
+            {
+                'details': {
+                    'l10n.ftl': [
+                        {'missingEntity': u'bar'},
+                        {'missingEntity': u'eff'},
+                        {'error': u'Unparsed content "-- Invalid Comment" '
+                                  u'from line 1 column 1 '
+                                  u'to line 1 column 19'},
+                        {'error': u'Unparsed content "bar lBar" '
+                                  u'from line 3 column 1 '
+                                  u'to line 3 column 9'},
+                        {'error': u'Unparsed content "eff = lEff {" '
+                                  u'from line 4 column 1 '
+                                  u'to line 4 column 13'},
+                    ],
+                },
+                'summary': {
+                    None: {
+                        'changed': 1,
+                        'changed_w': 1,
+                        'missing': 2,
+                        'missing_w': 2,
+                        'errors': 3
+                    }
+                }
+            }
+        )
+
+        # validate merge results
+        mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl")
+        self.assert_(os.path.exists(mergepath))
+
+        p = getParser(mergepath)
+        p.readFile(mergepath)
+        merged_entities, merged_map = p.parse()
+        self.assertEqual([e.key for e in merged_entities], ["foo"])
+        # foo should be l10n
+        p.readFile(self.l10n)
+        l10n_entities, l10n_map = p.parse()
+        self.assertEqual(
+            merged_entities[merged_map['foo']],
+            l10n_entities[l10n_map['foo']])
+
+    def testMismatchingAttributes(self):
+        self.reference("""
+foo = Foo
+bar = Bar
+  .tender = Attribute value
+eff = Eff
+""")
+        self.localized("""\
+foo = lFoo
+  .obsolete = attr
+bar = lBar
+eff = lEff
+""")
+        cc = ContentComparer([Observer()])
+        cc.compare(File(self.ref, "en-reference.ftl", ""),
+                   File(self.l10n, "l10n.ftl", ""),
+                   mozpath.join(self.tmp, "merge", "l10n.ftl"))
+
+        self.assertDictEqual(
+            cc.observers[0].toJSON(),
+            {
+                'details': {
+                    'l10n.ftl': [
+                            {
+                                'error':
+                                    u'Obsolete attribute: '
+                                    'obsolete at line 2, column 3 for foo'
+                            },
+                            {
+                                'error':
+                                    u'Missing attribute: tender at line 3,'
+                                    ' column 1 for bar',
+                            },
+                    ],
+                },
+                'summary': {
+                    None: {'changed': 3, 'changed_w': 5, 'errors': 2}
+                }
+            }
+        )
+
+        # validate merge results
+        mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl")
+        self.assert_(os.path.exists(mergepath))
+
+        p = getParser(mergepath)
+        p.readFile(mergepath)
+        merged_entities, merged_map = p.parse()
+        self.assertEqual([e.key for e in merged_entities], ["eff"])
+        # eff should be l10n
+        p.readFile(self.l10n)
+        l10n_entities, l10n_map = p.parse()
+        self.assertEqual(
+            merged_entities[merged_map['eff']],
+            l10n_entities[l10n_map['eff']])
+
+    def testMismatchingValues(self):
+        self.reference("""
+foo = Foo
+  .foottr = something
+bar
+  .tender = Attribute value
+""")
+        self.localized("""\
+foo
+  .foottr = attr
+bar = lBar
+  .tender = localized
+""")
+        cc = ContentComparer([Observer()])
+        cc.compare(File(self.ref, "en-reference.ftl", ""),
+                   File(self.l10n, "l10n.ftl", ""),
+                   mozpath.join(self.tmp, "merge", "l10n.ftl"))
+
+        self.assertDictEqual(
+            cc.observers[0].toJSON(),
+            {
+                'details': {
+                    'l10n.ftl': [
+                        {
+                            'error':
+                                u'Missing value at line 1, column 1 for foo'
+                        },
+                        {
+                            'error':
+                                u'Obsolete value at line 3, column 7 for bar',
+                        },
+                    ]
+                },
+                'summary': {
+                    None: {'changed': 2, 'changed_w': 4, 'errors': 2}
+                }
+            }
+        )
+
+        # validate merge results
+        mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl")
+        self.assert_(os.path.exists(mergepath))
+
+        p = getParser(mergepath)
+        p.readFile(mergepath)
+        merged_entities, _ = p.parse()
+        self.assertEqual([e.key for e in merged_entities], [])
+
+    def testMissingSection(self):
+        self.reference("""\
+foo = fooVal
+
+[[ Section ]]
+bar = barVal
+""")
+        self.localized("""\
+foo = lFoo
+bar = lBar
+""")
+        cc = ContentComparer([Observer()])
+        cc.compare(File(self.ref, "en-reference.ftl", ""),
+                   File(self.l10n, "l10n.ftl", ""),
+                   mozpath.join(self.tmp, "merge", "l10n.ftl"))
+
+        self.assertDictEqual(
+            cc.observers[0].toJSON(),
+            {
+                'details': {},
+                'summary': {
+                    None: {
+                        'changed': 2,
+                        'changed_w': 2,
+                    }
+                }
+            }
+        )
+
+        # validate merge results
+        mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl")
+        self.assert_(not os.path.exists(mergepath))
+
+    def testMissingAttachedComment(self):
+        self.reference("""\
+foo = fooVal
+
+// Attached Comment
+bar = barVal
+""")
+        self.localized("""\
+foo = lFoo
+bar = barVal
+""")
+        cc = ContentComparer([Observer()])
+        cc.compare(File(self.ref, "en-reference.ftl", ""),
+                   File(self.l10n, "l10n.ftl", ""),
+                   mozpath.join(self.tmp, "merge", "l10n.ftl"))
+
+        self.assertDictEqual(
+            cc.observers[0].toJSON(),
+            {
+                'details': {},
+                'summary': {
+                    None: {
+                        'changed': 1,
+                        'changed_w': 1,
+                        'unchanged': 1,
+                        'unchanged_w': 1,
+                    }
+                }
+            }
+        )
+
+        # validate merge results
+        mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl")
+        self.assert_(not os.path.exists(mergepath))
+
+    def testObsoleteStandaloneComment(self):
+        self.reference("""\
+foo = fooVal
+bar = barVal
+""")
+        self.localized("""\
+foo = lFoo
+
+// Standalone Comment
+
+bar = lBar
+""")
+        cc = ContentComparer([Observer()])
+        cc.compare(File(self.ref, "en-reference.ftl", ""),
+                   File(self.l10n, "l10n.ftl", ""),
+                   mozpath.join(self.tmp, "merge", "l10n.ftl"))
+
+        self.assertDictEqual(
+            cc.observers[0].toJSON(),
+            {
+                'details': {},
+                'summary': {
+                    None: {
+                        'changed': 2,
+                        'changed_w': 2,
+                    }
+                }
+            }
+        )
+
+        # validate merge results
+        mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl")
+        self.assert_(not os.path.exists(mergepath))
+
+
 if __name__ == '__main__':
     unittest.main()
--- a/setup.py
+++ b/setup.py
@@ -40,11 +40,12 @@ setup(name="compare-locales",
       platforms=["any"],
       entry_points={'console_scripts': [
           'compare-locales = compare_locales.commands:CompareLocales.call']},
       packages=['compare_locales', 'compare_locales.tests'],
       package_data={
           'compare_locales.tests': ['data/*.properties', 'data/*.dtd']
       },
       install_requires=[
+          'fluent>=0.4.1',
           'pytoml',
       ],
       test_suite='compare_locales.tests')