Bug 1399059 - Yield Whitespace between Entities in Parser.walk(only_localizable=False). r=Pike draft
authorStaś Małolepszy <stas@mozilla.com>
Fri, 22 Sep 2017 13:25:28 +0200
changeset 338 80dd1357ec658356cd54430d73ba715c542b8a7f
parent 319 90d69a0aae60eef880a96f052f2a4bd38247e231
push id109
push usersmalolepszy@mozilla.com
push dateSun, 24 Sep 2017 13:26:28 +0000
reviewersPike
bugs1399059
Bug 1399059 - Yield Whitespace between Entities in Parser.walk(only_localizable=False). r=Pike MozReview-Commit-ID: 7ob1sBVwO37
compare_locales/parser.py
compare_locales/tests/test_checks.py
compare_locales/tests/test_defines.py
compare_locales/tests/test_dtd.py
compare_locales/tests/test_ftl.py
compare_locales/tests/test_ini.py
compare_locales/tests/test_merge.py
compare_locales/tests/test_properties.py
--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -27,38 +27,30 @@ CAN_SKIP = 2
 CAN_MERGE = 4
 
 
 class EntityBase(object):
     '''
     Abstraction layer for a localizable entity.
     Currently supported are grammars of the form:
 
-    1: pre white space
-    2: entity definition
-    3: entity key (name)
-    4: entity value
-    5: post white space
-                                                 <--[1]
+    1: entity definition
+    2: entity key (name)
+    3: entity value
+
     <!ENTITY key "value">
 
-    <-------[2]--------->
+    <--- definition ---->
     '''
-    def __init__(self, ctx, pre_comment,
-                 span, pre_ws_span, def_span,
-                 key_span, val_span, post_span):
+    def __init__(self, ctx, pre_comment, span, key_span, val_span):
         self.ctx = ctx
         self.span = span
-        self.pre_ws_span = pre_ws_span
-        self.def_span = def_span
         self.key_span = key_span
         self.val_span = val_span
-        self.post_span = post_span
         self.pre_comment = pre_comment
-        pass
 
     def position(self, offset=0):
         """Get the 1-based line and column of the character
         with given offset into the Entity.
 
         If offset is negative, return the end of the Entity.
         """
         if offset < 0:
@@ -79,40 +71,28 @@ class EntityBase(object):
             pos = self.val_span[0] + offset
         return self.ctx.lines(pos)[0]
 
     # getter helpers
 
     def get_all(self):
         return self.ctx.contents[self.span[0]:self.span[1]]
 
-    def get_pre_ws(self):
-        return self.ctx.contents[self.pre_ws_span[0]:self.pre_ws_span[1]]
-
-    def get_def(self):
-        return self.ctx.contents[self.def_span[0]:self.def_span[1]]
-
     def get_key(self):
         return self.ctx.contents[self.key_span[0]:self.key_span[1]]
 
     def get_raw_val(self):
         return self.ctx.contents[self.val_span[0]:self.val_span[1]]
 
-    def get_post(self):
-        return self.ctx.contents[self.post_span[0]:self.post_span[1]]
-
     # getters
 
     all = property(get_all)
-    pre_ws = property(get_pre_ws)
-    definition = property(get_def)
     key = property(get_key)
     val = property(get_raw_val)
     raw_val = property(get_raw_val)
-    post = property(get_post)
 
     def __repr__(self):
         return self.key
 
     re_br = re.compile('<br\s*/?>', re.U)
     re_sgml = re.compile('</?\w+.*?>', re.U | re.M)
 
     def count_words(self):
@@ -127,23 +107,19 @@ class EntityBase(object):
         return self.key == other.key and self.val == other.val
 
 
 class Entity(EntityBase):
     pass
 
 
 class Comment(EntityBase):
-    def __init__(self, ctx, span, pre_ws_span, def_span,
-                 post_span):
+    def __init__(self, ctx, span):
         self.ctx = ctx
         self.span = span
-        self.pre_ws_span = pre_ws_span
-        self.def_span = def_span
-        self.post_span = post_span
 
     @property
     def key(self):
         return None
 
     @property
     def val(self):
         return None
@@ -158,17 +134,16 @@ class Junk(object):
     This way, we can signal bad content as stuff we don't understand.
     And the either fix that, or report real bugs in localizations.
     '''
     junkid = 0
 
     def __init__(self, ctx, span):
         self.ctx = ctx
         self.span = span
-        self.pre_ws = self.definition = self.post = ''
         self.__class__.junkid += 1
         self.key = '_junk_%d_%d-%d' % (self.__class__.junkid, span[0], span[1])
 
     def position(self, offset=0):
         """Get the 1-based line and column of the character
         with given offset into the Entity.
 
         If offset is negative, return the end of the Entity.
@@ -192,32 +167,32 @@ class Junk(object):
 
 
 class Whitespace(EntityBase):
     '''Entity-like object representing an empty file with whitespace,
     if allowed
     '''
     def __init__(self, ctx, span):
         self.ctx = ctx
-        self.key_span = self.val_span = self.span = span
-        self.def_span = self.pre_ws_span = (span[0], span[0])
-        self.post_span = (span[1], span[1])
+        self.span = self.key_span = self.val_span = span
 
     def __repr__(self):
         return self.raw_val
 
 
 class Parser(object):
     capabilities = CAN_SKIP | CAN_MERGE
-    tail = re.compile('\s+\Z')
+    reWhitespace = re.compile('\s+', re.M)
 
     class Context(object):
         "Fixture for content and line numbers"
         def __init__(self, contents):
             self.contents = contents
+            # Subclasses may use bitmasks to keep state.
+            self.state = 0
             self._lines = None
 
         def lines(self, *positions):
             # return line and column tuples, 1-based
             if self._lines is None:
                 nl = re.compile('\n', re.M)
                 self._lines = [m.end()
                                for m in nl.finditer(self.contents)]
@@ -254,97 +229,77 @@ class Parser(object):
         l = []
         m = {}
         for e in self:
             m[e.key] = len(l)
             l.append(e)
         return (l, m)
 
     def __iter__(self):
-        return self.walk(onlyEntities=True)
+        return self.walk(only_localizable=True)
 
-    def walk(self, onlyEntities=False):
+    def walk(self, only_localizable=False):
         if not self.ctx:
             # loading file failed, or we just didn't load anything
             return
         ctx = self.ctx
         contents = ctx.contents
-        offset = 0
-        entity, offset = self.getEntity(ctx, offset)
-        while entity:
-            if (not onlyEntities or
-                    isinstance(entity, Entity) or
-                    type(entity) is Junk):
+
+        next_offset = 0
+        while next_offset < len(contents):
+            entity = self.getNext(ctx, next_offset)
+
+            if isinstance(entity, (Entity, Junk)):
                 yield entity
-            entity, offset = self.getEntity(ctx, offset)
-        if len(contents) > offset:
-            yield Junk(ctx, (offset, len(contents)))
+            elif not only_localizable:
+                yield entity
+
+            next_offset = entity.span[1]
 
-    def getEntity(self, ctx, offset):
+    def getNext(self, ctx, offset):
+        m = self.reWhitespace.match(ctx.contents, offset)
+        if m:
+            return Whitespace(ctx, m.span())
         m = self.reKey.match(ctx.contents, offset)
         if m:
-            offset = m.end()
-            entity = self.createEntity(ctx, m)
-            return (entity, offset)
+            return self.createEntity(ctx, m)
         m = self.reComment.match(ctx.contents, offset)
         if m:
-            offset = m.end()
-            self.last_comment = Comment(ctx, *[m.span(i) for i in xrange(4)])
-            return (self.last_comment, offset)
-        return self.getTrailing(ctx, offset, self.reKey, self.reComment)
+            self.last_comment = Comment(ctx, m.span())
+            return self.last_comment
+        return self.getJunk(ctx, offset, self.reKey, self.reComment)
 
-    def getTrailing(self, ctx, offset, *expressions):
+    def getJunk(self, ctx, offset, *expressions):
         junkend = None
         for exp in expressions:
             m = exp.search(ctx.contents, offset)
             if m:
                 junkend = min(junkend, m.start()) if junkend else m.start()
-        if junkend is None:
-            if self.tail.match(ctx.contents, offset):
-                white_end = len(ctx.contents)
-                return (Whitespace(ctx, (offset, white_end)), white_end)
-            else:
-                return (None, offset)
-        return (Junk(ctx, (offset, junkend)), junkend)
+        return Junk(ctx, (offset, junkend or len(ctx.contents)))
 
     def createEntity(self, ctx, m):
         pre_comment = self.last_comment
         self.last_comment = None
-        return Entity(ctx, pre_comment,
-                      *[m.span(i) for i in xrange(6)])
+        return Entity(ctx, pre_comment, m.span(), m.span('key'), m.span('val'))
 
     @classmethod
     def findDuplicates(cls, entities):
         found = Counter(entity.key for entity in entities)
         for entity_id, cnt in found.items():
             if cnt > 1:
                 yield '{} occurs {} times'.format(entity_id, cnt)
 
 
 def getParser(path):
     for item in __constructors:
         if re.search(item[0], path):
             return item[1]
     raise UserWarning("Cannot find Parser")
 
 
-# Subgroups of the match will:
-# 1: pre white space
-# 2: pre comments
-# 3: entity definition
-# 4: entity key (name)
-# 5: entity value
-# 6: post comment (and white space) in the same line (dtd only)
-#                                            <--[1]
-# <!-- pre comments -->                      <--[2]
-# <!ENTITY key "value"> <!-- comment -->
-#
-# <-------[3]---------><------[6]------>
-
-
 class DTDEntity(Entity):
     def value_position(self, offset=0):
         # DTDChecker already returns tuples of (line, col) positions
         if isinstance(offset, tuple):
             line_pos, col_pos = offset
             line, col = super(DTDEntity, self).value_position()
             if line_pos == 1:
                 col = col + col_pos
@@ -369,58 +324,54 @@ class DTDParser(Parser):
         u'\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F' + \
         u'\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD'
     # + \U00010000-\U000EFFFF seems to be unsupported in python
 
     # NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 |
     #     [#x0300-#x036F] | [#x203F-#x2040]
     NameChar = NameStartChar + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040'
     Name = '[' + NameStartChar + '][' + NameChar + ']*'
-    reKey = re.compile('(?:(?P<pre>\s*)(?P<entity><!ENTITY\s+(?P<key>' + Name +
-                       ')\s+(?P<val>\"[^\"]*\"|\'[^\']*\'?)\s*>)'
-                       '(?P<post>\s+)?)',
+    reKey = re.compile('<!ENTITY\s+(?P<key>' + Name + ')\s+'
+                       '(?P<val>\"[^\"]*\"|\'[^\']*\'?)\s*>',
                        re.DOTALL | re.M)
     # add BOM to DTDs, details in bug 435002
     reHeader = re.compile(u'^\ufeff')
-    reComment = re.compile('(\s*)(<!--(-?[%s])*?-->)(\s*)' % CharMinusDash,
+    reComment = re.compile('<!--(?P<val>-?[%s])*?-->' % CharMinusDash,
                            re.S)
-    rePE = re.compile(u'(?:(\s*)'
-                      u'(<!ENTITY\s+%\s+(' + Name +
-                      u')\s+SYSTEM\s+(\"[^\"]*\"|\'[^\']*\')\s*>\s*%' + Name +
-                      u';)([ \t]*(?:' + XmlComment + u'\s*)*\n?)?)')
+    rePE = re.compile(u'<!ENTITY\s+%\s+(?P<key>' + Name + ')\s+'
+                      u'SYSTEM\s+(?P<val>\"[^\"]*\"|\'[^\']*\')\s*>\s*'
+                      u'%' + Name + ';'
+                      u'(?:[ \t]*(?:' + XmlComment + u'\s*)*\n?)?')
 
-    def getEntity(self, ctx, offset):
+    def getNext(self, ctx, offset):
         '''
-        Overload Parser.getEntity to special-case ParsedEntities.
+        Overload Parser.getNext to special-case ParsedEntities.
         Just check for a parsed entity if that method claims junk.
 
         <!ENTITY % foo SYSTEM "url">
         %foo;
         '''
         if offset is 0 and self.reHeader.match(ctx.contents):
             offset += 1
-        entity, inneroffset = Parser.getEntity(self, ctx, offset)
+        entity = Parser.getNext(self, ctx, offset)
         if (entity and isinstance(entity, Junk)) or entity is None:
             m = self.rePE.match(ctx.contents, offset)
             if m:
-                inneroffset = m.end()
                 self.last_comment = None
-                entity = DTDEntity(ctx, '', *[m.span(i) for i in xrange(6)])
-        return (entity, inneroffset)
+                entity = DTDEntity(
+                    ctx, '', m.span(), m.span('key'), m.span('val'))
+        return entity
 
     def createEntity(self, ctx, m):
         valspan = m.span('val')
         valspan = (valspan[0]+1, valspan[1]-1)
         pre_comment = self.last_comment
         self.last_comment = None
         return DTDEntity(ctx, pre_comment,
-                         m.span(),
-                         m.span('pre'),
-                         m.span('entity'), m.span('key'), valspan,
-                         m.span('post'))
+                         m.span(), m.span('key'), valspan)
 
 
 class PropertiesEntity(Entity):
     escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|'
                         '(?P<nl>\n\s*)|(?P<single>.))', re.M)
     known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'}
 
     @property
@@ -433,38 +384,36 @@ class PropertiesEntity(Entity):
                 return ''
             return self.known_escapes.get(found['single'], found['single'])
 
         return self.escape.sub(unescape, self.raw_val)
 
 
 class PropertiesParser(Parser):
     def __init__(self):
-        self.reKey = re.compile('^(\s*)'
-                                '([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
-        self.reComment = re.compile('(\s*)(((?:[#!][^\n]*\n?)+))', re.M)
+        self.reKey = re.compile(
+            '(?P<key>[^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
+        self.reComment = re.compile('(?:[#!][^\n]*\n)*(?:[#!][^\n]*)', re.M)
         self._escapedEnd = re.compile(r'\\+$')
-        self._trailingWS = re.compile(r'\s*(?:\n|\Z)', re.M)
+        self._trailingWS = re.compile(r'\s*[\n\Z]', re.M)
         Parser.__init__(self)
 
-    def getEntity(self, ctx, offset):
+    def getNext(self, ctx, offset):
         # overwritten to parse values line by line
         contents = ctx.contents
+
+        m = self.reWhitespace.match(contents, offset)
+        if m:
+            return Whitespace(ctx, m.span())
+
         m = self.reComment.match(contents, offset)
         if m:
-            spans = [m.span(i) for i in xrange(3)]
-            start_trailing = offset = m.end()
-            while offset < len(contents):
-                m = self._trailingWS.match(contents, offset)
-                if not m:
-                    break
-                offset = m.end()
-            spans.append((start_trailing, offset))
-            self.last_comment = Comment(ctx, *spans)
-            return (self.last_comment, offset)
+            self.last_comment = Comment(ctx, m.span())
+            return self.last_comment
+
         m = self.reKey.match(contents, offset)
         if m:
             startline = offset = m.end()
             while True:
                 endval = nextline = contents.find('\n', offset)
                 if nextline == -1:
                     endval = offset = len(contents)
                     break
@@ -472,147 +421,140 @@ class PropertiesParser(Parser):
                 _e = self._escapedEnd.search(contents, offset, nextline)
                 offset = nextline + 1
                 if _e is None:
                     break
                 # backslashes at end of line, if 2*n, not escaped
                 if len(_e.group()) % 2 == 0:
                     break
                 startline = offset
+
             # strip trailing whitespace
             ws = self._trailingWS.search(contents, startline)
             if ws:
                 endval = ws.start()
-                offset = ws.end()
+
             pre_comment = self.last_comment
             self.last_comment = None
             entity = PropertiesEntity(
                 ctx, pre_comment,
-                (m.start(), offset),   # full span
-                m.span(1),  # leading whitespan
-                (m.start(2), offset),   # entity def span
-                m.span(2),   # key span
-                (m.end(), endval),   # value span
-                (offset, offset))  # post comment span, empty
-            return (entity, offset)
-        return self.getTrailing(ctx, offset, self.reKey, self.reComment)
+                (m.start(), endval),   # full span
+                m.span('key'),
+                (m.end(), endval))   # value span
+            return entity
+
+        return self.getJunk(ctx, offset, self.reKey, self.reComment)
 
 
 class DefinesInstruction(EntityBase):
     '''Entity-like object representing processing instructions in inc files
     '''
-    def __init__(self, ctx, span, pre_ws_span, def_span, val_span, post_span):
+    def __init__(self, ctx, span, val_span):
         self.ctx = ctx
         self.span = span
-        self.pre_ws_span = pre_ws_span
-        self.def_span = def_span
         self.key_span = self.val_span = val_span
-        self.post_span = post_span
 
     def __repr__(self):
         return self.raw_val
 
 
 class DefinesParser(Parser):
     # can't merge, #unfilter needs to be the last item, which we don't support
     capabilities = CAN_COPY
-    tail = re.compile(r'(?!)')  # never match
+    reWhitespace = re.compile('\n+', re.M)
+
+    EMPTY_LINES = 1 << 0
+    PAST_FIRST_LINE = 1 << 1
 
     def __init__(self):
-        self.reComment = re.compile(
-            '((?:[ \t]*\n)*)'
-            '((?:^# .*?(?:\n|\Z))+)'
-            '((?:[ \t]*(?:\n|\Z))*)', re.M)
-        self.reKey = re.compile('((?:[ \t]*\n)*)'
-                                '(#define[ \t]+(\w+)(?:[ \t](.*?))?(?:\n|\Z))'
-                                '((?:[ \t]*(?:\n|\Z))*)',
-                                re.M)
-        self.rePI = re.compile('((?:[ \t]*\n)*)'
-                               '(#(\w+)[ \t]+(.*?)(?:\n|\Z))'
-                               '((?:[ \t]*(?:\n|\Z))*)',
-                               re.M)
+        self.reComment = re.compile('(?:^# .*?\n)*(?:^# [^\n]*)', re.M)
+        self.reKey = re.compile(
+            '#define[ \t]+(?P<key>\w+)(?:[ \t]+(?P<val>[^\n]*))?', re.M)
+        self.rePI = re.compile('#(?P<val>\w+[ \t]+[^\n]+)', re.M)
         Parser.__init__(self)
 
-    def getEntity(self, ctx, offset):
+    def getNext(self, ctx, offset):
         contents = ctx.contents
+
+        m = self.reWhitespace.match(contents, offset)
+        if m:
+            if ctx.state & self.EMPTY_LINES:
+                return Whitespace(ctx, m.span())
+            if ctx.state & self.PAST_FIRST_LINE and len(m.group()) == 1:
+                return Whitespace(ctx, m.span())
+            else:
+                return Junk(ctx, m.span())
+
+        # We're not in the first line anymore.
+        ctx.state |= self.PAST_FIRST_LINE
+
         m = self.reComment.match(contents, offset)
         if m:
-            offset = m.end()
-            self.last_comment = Comment(ctx, *[m.span(i) for i in xrange(4)])
-            return (self.last_comment, offset)
+            self.last_comment = Comment(ctx, m.span())
+            return self.last_comment
         m = self.reKey.match(contents, offset)
         if m:
-            offset = m.end()
-            return (self.createEntity(ctx, m), offset)
+            return self.createEntity(ctx, m)
         m = self.rePI.match(contents, offset)
         if m:
-            offset = m.end()
-            return (DefinesInstruction(ctx, *[m.span(i) for i in xrange(5)]),
-                    offset)
-        return self.getTrailing(ctx, offset,
-                                self.reComment, self.reKey, self.rePI)
+            instr = DefinesInstruction(ctx, m.span(), m.span('val'))
+            if instr.val == 'filter emptyLines':
+                ctx.state |= self.EMPTY_LINES
+            if instr.val == 'unfilter emptyLines':
+                ctx.state &= ~ self.EMPTY_LINES
+            return instr
+        return self.getJunk(
+            ctx, offset, self.reComment, self.reKey, self.rePI)
 
 
 class IniSection(EntityBase):
     '''Entity-like object representing sections in ini files
     '''
-    def __init__(self, ctx, span, pre_ws_span, def_span, val_span, post_span):
+    def __init__(self, ctx, span, val_span):
         self.ctx = ctx
         self.span = span
-        self.pre_ws_span = pre_ws_span
-        self.def_span = def_span
         self.key_span = self.val_span = val_span
-        self.post_span = post_span
 
     def __repr__(self):
         return self.raw_val
 
 
 class IniParser(Parser):
     '''
     Parse files of the form:
     # initial comment
     [cat]
     whitespace*
     #comment
     string=value
     ...
     '''
     def __init__(self):
-        self.reComment = re.compile(
-            '((?:[ \t]*\n)*)'
-            '((?:^[;#].*?(?:\n|\Z))+)'
-            '((?:[ \t]*(?:\n|\Z))*)', re.M)
-        self.reSection = re.compile(
-            '((?:[ \t]*\n)*)'
-            '(\[(.*?)\])'
-            '((?:[ \t]*(?:\n|\Z))*)', re.M)
-        self.reKey = re.compile(
-            '((?:[ \t]*\n)*)'
-            '((.+?)=(.*))'
-            '((?:[ \t]*(?:\n|\Z))*)', re.M)
+        self.reComment = re.compile('(?:^[;#][^\n]*\n)*(?:^[;#][^\n]*)', re.M)
+        self.reSection = re.compile('\[(?P<val>.*?)\]', re.M)
+        self.reKey = re.compile('(?P<key>.+?)=(?P<val>.*)', re.M)
         Parser.__init__(self)
 
-    def getEntity(self, ctx, offset):
+    def getNext(self, ctx, offset):
         contents = ctx.contents
+        m = self.reWhitespace.match(contents, offset)
+        if m:
+            return Whitespace(ctx, m.span())
         m = self.reComment.match(contents, offset)
         if m:
-            offset = m.end()
-            self.last_comment = Comment(ctx, *[m.span(i) for i in xrange(4)])
-            return (self.last_comment, offset)
+            self.last_comment = Comment(ctx, m.span())
+            return self.last_comment
         m = self.reSection.match(contents, offset)
         if m:
-            offset = m.end()
-            return (IniSection(ctx, *[m.span(i) for i in xrange(5)]), offset)
+            return IniSection(ctx, m.span(), m.span('val'))
         m = self.reKey.match(contents, offset)
         if m:
-            offset = m.end()
-            return (self.createEntity(ctx, m), offset)
-        return self.getTrailing(ctx, offset,
-                                self.reComment, self.reSection, self.reKey)
+            return self.createEntity(ctx, m)
+        return self.getJunk(
+            ctx, offset, self.reComment, self.reSection, self.reKey)
 
 
 class FluentAttribute(EntityBase):
     ignored_fields = ['span']
 
     def __init__(self, entity, attr_node):
         self.ctx = entity.ctx
         self.attr = attr_node
@@ -686,32 +628,45 @@ class FluentEntity(Entity):
 
 class FluentParser(Parser):
     capabilities = CAN_SKIP
 
     def __init__(self):
         super(FluentParser, self).__init__()
         self.ftl_parser = FTLParser()
 
-    def walk(self, onlyEntities=False):
+    def walk(self, only_localizable=False):
         if not self.ctx:
             # loading file failed, or we just didn't load anything
             return
         resource = self.ftl_parser.parse(self.ctx.contents)
+        last_span_end = resource.comment.span.end if resource.comment else 0
         for entry in resource.body:
+            if not only_localizable:
+                if entry.span.start > last_span_end:
+                    yield Whitespace(
+                        self.ctx, (last_span_end, entry.span.start))
+
             if isinstance(entry, ftl.Message):
                 yield FluentEntity(self.ctx, entry)
             elif isinstance(entry, ftl.Junk):
                 start = entry.span.start
                 end = entry.span.end
                 # strip leading whitespace
                 start += re.match('\s*', entry.content).end()
                 # strip trailing whitespace
                 ws, we = re.search('\s*$', entry.content).span()
                 end -= we - ws
                 yield Junk(self.ctx, (start, end))
 
+            last_span_end = entry.span.end
+
+        if not only_localizable:
+            eof_offset = len(self.ctx.contents)
+            if eof_offset > last_span_end:
+                yield Whitespace(self.ctx, (last_span_end, eof_offset))
+
 
 __constructors = [('\\.dtd$', DTDParser()),
                   ('\\.properties$', PropertiesParser()),
                   ('\\.ini$', IniParser()),
                   ('\\.inc$', DefinesParser()),
                   ('\\.ftl$', FluentParser())]
--- a/compare_locales/tests/test_checks.py
+++ b/compare_locales/tests/test_checks.py
@@ -223,29 +223,26 @@ class TestAndroid(unittest.TestCase):
     Make sure we're hitting our extra rules only if
     we're passing in a DTD file in the embedding/android module.
     """
     apos_msg = u"Apostrophes in Android DTDs need escaping with \\' or " + \
                u"\\u0027, or use \u2019, or put string in quotes."
     quot_msg = u"Quotes in Android DTDs need escaping with \\\" or " + \
                u"\\u0022, or put string in apostrophes."
 
-    def getEntity(self, v):
+    def getNext(self, v):
         ctx = Parser.Context(v)
         return DTDEntity(
-            ctx, '', (0, len(v)), (), (), (), (0, len(v)), ())
+            ctx, '', (0, len(v)), (), (0, len(v)))
 
     def getDTDEntity(self, v):
         v = v.replace('"', '&quot;')
         ctx = Parser.Context('<!ENTITY foo "%s">' % v)
         return DTDEntity(
-            ctx,
-            '',
-            (0, len(v) + 16), (), (), (9, 12),
-            (14, len(v) + 14), ())
+            ctx, '', (0, len(v) + 16), (9, 12), (14, len(v) + 14))
 
     def test_android_dtd(self):
         """Testing the actual android checks. The logic is involved,
         so this is a lot of nitty gritty detail tests.
         """
         f = File("embedding/android/strings.dtd", "strings.dtd",
                  "embedding/android")
         checker = getChecker(f, extra_tests=['android-dtd'])
@@ -321,33 +318,33 @@ class TestAndroid(unittest.TestCase):
                          (('error', 14, 'truncated \\uXXXX escape',
                            'android'),))
 
     def test_android_prop(self):
         f = File("embedding/android/strings.properties", "strings.properties",
                  "embedding/android")
         checker = getChecker(f, extra_tests=['android-dtd'])
         # good plain string
-        ref = self.getEntity("plain string")
-        l10n = self.getEntity("plain localized string")
+        ref = self.getNext("plain string")
+        l10n = self.getNext("plain localized string")
         self.assertEqual(tuple(checker.check(ref, l10n)),
                          ())
         # no dtd warning
-        ref = self.getEntity("plain string")
-        l10n = self.getEntity("plain localized string &ref;")
+        ref = self.getNext("plain string")
+        l10n = self.getNext("plain localized string &ref;")
         self.assertEqual(tuple(checker.check(ref, l10n)),
                          ())
         # no report on stray ampersand
-        ref = self.getEntity("plain string")
-        l10n = self.getEntity("plain localized string with apos: '")
+        ref = self.getNext("plain string")
+        l10n = self.getNext("plain localized string with apos: '")
         self.assertEqual(tuple(checker.check(ref, l10n)),
                          ())
         # report on bad printf
-        ref = self.getEntity("string with %s")
-        l10n = self.getEntity("string with %S")
+        ref = self.getNext("string with %s")
+        l10n = self.getNext("string with %S")
         self.assertEqual(tuple(checker.check(ref, l10n)),
                          (('error', 0, 'argument 1 `S` should be `s`',
                            'printf'),))
 
     def test_non_android_dtd(self):
         f = File("browser/strings.dtd", "strings.dtd", "browser")
         checker = getChecker(f)
         # good string
--- a/compare_locales/tests/test_defines.py
+++ b/compare_locales/tests/test_defines.py
@@ -6,81 +6,160 @@
 import unittest
 
 from compare_locales.tests import ParserTestMixin
 
 
 mpl2 = '''\
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
-# You can obtain one at http://mozilla.org/MPL/2.0/.
-'''
+# You can obtain one at http://mozilla.org/MPL/2.0/.'''
 
 
 class TestDefinesParser(ParserTestMixin, unittest.TestCase):
 
     filename = 'defines.inc'
 
     def testBrowser(self):
-        self._test(mpl2 + '''#filter emptyLines
+        self._test(mpl2 + '''
+#filter emptyLines
 
 #define MOZ_LANGPACK_CREATOR mozilla.org
 
 # If non-English locales wish to credit multiple contributors, uncomment this
 # variable definition and use the format specified.
 # #define MOZ_LANGPACK_CONTRIBUTORS <em:contributor>Joe Solon</em:contributor>
 
 #unfilter emptyLines
 
 ''', (
             ('Comment', mpl2),
+            ('Whitespace', '\n'),
             ('DefinesInstruction', 'filter emptyLines'),
+            ('Whitespace', '\n\n'),
             ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+            ('Whitespace', '\n\n'),
             ('Comment', '#define'),
-            ('DefinesInstruction', 'unfilter emptyLines')))
+            ('Whitespace', '\n\n'),
+            ('DefinesInstruction', 'unfilter emptyLines'),
+            ('Junk', '\n\n')))
 
     def testBrowserWithContributors(self):
-        self._test(mpl2 + '''#filter emptyLines
+        self._test(mpl2 + '''
+#filter emptyLines
 
 #define MOZ_LANGPACK_CREATOR mozilla.org
 
 # If non-English locales wish to credit multiple contributors, uncomment this
 # variable definition and use the format specified.
 #define MOZ_LANGPACK_CONTRIBUTORS <em:contributor>Joe Solon</em:contributor>
 
 #unfilter emptyLines
 
 ''', (
             ('Comment', mpl2),
+            ('Whitespace', '\n'),
             ('DefinesInstruction', 'filter emptyLines'),
+            ('Whitespace', '\n\n'),
             ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+            ('Whitespace', '\n\n'),
             ('Comment', 'non-English'),
+            ('Whitespace', '\n'),
             ('MOZ_LANGPACK_CONTRIBUTORS',
              '<em:contributor>Joe Solon</em:contributor>'),
-            ('DefinesInstruction', 'unfilter emptyLines')))
+            ('Whitespace', '\n\n'),
+            ('DefinesInstruction', 'unfilter emptyLines'),
+            ('Junk', '\n\n')))
 
     def testCommentWithNonAsciiCharacters(self):
-        self._test(mpl2 + '''#filter emptyLines
+        self._test(mpl2 + '''
+#filter emptyLines
 
 # e.g. #define seamonkey_l10n <DT><A HREF="urn:foo">SeaMonkey v češtině</a>
 #define seamonkey_l10n_long
 
 #unfilter emptyLines
 
 ''', (
             ('Comment', mpl2),
+            ('Whitespace', '\n'),
             ('DefinesInstruction', 'filter emptyLines'),
+            ('Whitespace', '\n\n'),
             ('Comment', u'češtině'),
+            ('Whitespace', '\n'),
             ('seamonkey_l10n_long', ''),
+            ('Whitespace', '\n\n'),
+            ('DefinesInstruction', 'unfilter emptyLines'),
+            ('Junk', '\n\n')))
+
+    def test_no_empty_lines(self):
+        self._test('''#define MOZ_LANGPACK_CREATOR mozilla.org
+#define MOZ_LANGPACK_CREATOR mozilla.org
+''', (
+            ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+            ('Whitespace', '\n'),
+            ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+            ('Whitespace', '\n')))
+
+    def test_empty_line_between(self):
+        self._test('''#define MOZ_LANGPACK_CREATOR mozilla.org
+
+#define MOZ_LANGPACK_CREATOR mozilla.org
+''', (
+            ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+            ('Junk', '\n'),
+            ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+            ('Whitespace', '\n')))
+
+    def test_empty_line_at_the_beginning(self):
+        self._test('''
+#define MOZ_LANGPACK_CREATOR mozilla.org
+#define MOZ_LANGPACK_CREATOR mozilla.org
+''', (
+            ('Junk', '\n'),
+            ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+            ('Whitespace', '\n'),
+            ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+            ('Whitespace', '\n')))
+
+    def test_filter_empty_lines(self):
+        self._test('''#filter emptyLines
+
+#define MOZ_LANGPACK_CREATOR mozilla.org
+#define MOZ_LANGPACK_CREATOR mozilla.org
+#unfilter emptyLines''', (
+            ('DefinesInstruction', 'filter emptyLines'),
+            ('Whitespace', '\n\n'),
+            ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+            ('Whitespace', '\n'),
+            ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+            ('Whitespace', '\n'),
             ('DefinesInstruction', 'unfilter emptyLines')))
 
+    def test_unfilter_empty_lines_with_trailing(self):
+        self._test('''#filter emptyLines
+
+#define MOZ_LANGPACK_CREATOR mozilla.org
+#define MOZ_LANGPACK_CREATOR mozilla.org
+#unfilter emptyLines
+''', (
+            ('DefinesInstruction', 'filter emptyLines'),
+            ('Whitespace', '\n\n'),
+            ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+            ('Whitespace', '\n'),
+            ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+            ('Whitespace', '\n'),
+            ('DefinesInstruction', 'unfilter emptyLines'),
+            ('Whitespace', '\n')))
+
     def testToolkit(self):
         self._test('''#define MOZ_LANG_TITLE English (US)
 ''', (
-            ('MOZ_LANG_TITLE', 'English (US)'),))
+            ('MOZ_LANG_TITLE', 'English (US)'),
+            ('Whitespace', '\n')))
 
     def testToolkitEmpty(self):
         self._test('', tuple())
 
     def test_empty_file(self):
         '''Test that empty files generate errors
 
         defines.inc are interesting that way, as their
--- a/compare_locales/tests/test_dtd.py
+++ b/compare_locales/tests/test_dtd.py
@@ -25,84 +25,94 @@ class TestDTD(ParserTestMixin, unittest.
 <!ENTITY good.two "two">
 <!ENTITY bad.two "bad "quoted" word">
 <!ENTITY good.three "three">
 <!ENTITY good.four "good ' quote">
 <!ENTITY good.five "good 'quoted' word">
 '''
     quoteRef = (
         ('good.one', 'one'),
-        ('Junk', '<!ENTITY bad.one "bad " quote">'),
+        ('Whitespace', '\n'),
+        ('Junk', '<!ENTITY bad.one "bad " quote">\n'),
         ('good.two', 'two'),
-        ('Junk', '<!ENTITY bad.two "bad "quoted" word">'),
+        ('Whitespace', '\n'),
+        ('Junk', '<!ENTITY bad.two "bad "quoted" word">\n'),
         ('good.three', 'three'),
+        ('Whitespace', '\n'),
         ('good.four', 'good \' quote'),
-        ('good.five', 'good \'quoted\' word'),)
+        ('Whitespace', '\n'),
+        ('good.five', 'good \'quoted\' word'),
+        ('Whitespace', '\n'),)
 
     def test_quotes(self):
         self._test(self.quoteContent, self.quoteRef)
 
     def test_apos(self):
         qr = re.compile('[\'"]', re.M)
 
         def quot2apos(s):
             return qr.sub(lambda m: m.group(0) == '"' and "'" or '"', s)
 
         self._test(quot2apos(self.quoteContent),
-                   map(lambda t: (t[0], quot2apos(t[1])), self.quoteRef))
+                   ((ref[0], quot2apos(ref[1])) for ref in self.quoteRef))
 
     def test_parsed_ref(self):
         self._test('''<!ENTITY % fooDTD SYSTEM "chrome://brand.dtd">
   %fooDTD;
 ''',
                    (('fooDTD', '"chrome://brand.dtd"'),))
 
     def test_trailing_comment(self):
         self._test('''<!ENTITY first "string">
 <!ENTITY second "string">
 <!--
 <!ENTITY commented "out">
 -->
 ''',
-                   (('first', 'string'), ('second', 'string'),
-                    ('Comment', 'out')))
+                   (
+                       ('first', 'string'),
+                       ('Whitespace', '\n'),
+                       ('second', 'string'),
+                       ('Whitespace', '\n'),
+                       ('Comment', 'out'),
+                       ('Whitespace', '\n')))
 
     def test_license_header(self):
         p = parser.getParser('foo.dtd')
         p.readContents(self.resource('triple-license.dtd'))
         entities = list(p.walk())
         self.assert_(isinstance(entities[0], parser.Comment))
         self.assertIn('MPL', entities[0].all)
-        e = entities[1]
+        e = entities[2]
         self.assert_(isinstance(e, parser.Entity))
         self.assertEqual(e.key, 'foo')
         self.assertEqual(e.val, 'value')
-        self.assertEqual(len(entities), 2)
+        self.assertEqual(len(entities), 4)
         p.readContents('''\
 <!-- This Source Code Form is subject to the terms of the Mozilla Public
    - License, v. 2.0. If a copy of the MPL was not distributed with this file,
    - You can obtain one at http://mozilla.org/MPL/2.0/.  -->
 <!ENTITY foo "value">
 ''')
         entities = list(p.walk())
         self.assert_(isinstance(entities[0], parser.Comment))
         self.assertIn('MPL', entities[0].all)
-        e = entities[1]
+        e = entities[2]
         self.assert_(isinstance(e, parser.Entity))
         self.assertEqual(e.key, 'foo')
         self.assertEqual(e.val, 'value')
-        self.assertEqual(len(entities), 2)
+        self.assertEqual(len(entities), 4)
 
     def testBOM(self):
         self._test(u'\ufeff<!ENTITY foo.label "stuff">'.encode('utf-8'),
                    (('foo.label', 'stuff'),))
 
     def test_trailing_whitespace(self):
         self._test('<!ENTITY foo.label "stuff">\n  \n',
-                   (('foo.label', 'stuff'),))
+                   (('foo.label', 'stuff'), ('Whitespace', '\n  \n')))
 
     def test_unicode_comment(self):
         self._test('<!-- \xe5\x8f\x96 -->',
                    (('Comment', u'\u53d6'),))
 
     def test_empty_file(self):
         self._test('', tuple())
         self._test('\n', (('Whitespace', '\n'),))
@@ -113,30 +123,22 @@ class TestDTD(ParserTestMixin, unittest.
         self.parser.readContents('''\
 <!ENTITY one  "value">
 <!ENTITY  two "other
 escaped value">
 ''')
         one, two = list(self.parser)
         self.assertEqual(one.position(), (1, 1))
         self.assertEqual(one.value_position(), (1, 16))
-        self.assertEqual(one.position(-1), (2, 1))
+        self.assertEqual(one.position(-1), (1, 23))
         self.assertEqual(two.position(), (2, 1))
         self.assertEqual(two.value_position(), (2, 16))
         self.assertEqual(two.value_position(-1), (3, 14))
         self.assertEqual(two.value_position(10), (3, 5))
 
-    def test_post(self):
-        self.parser.readContents('<!ENTITY a "a"><!ENTITY b "b">')
-        a, b = list(self.parser)
-        self.assertEqual(a.post, '')
-        self.parser.readContents('<!ENTITY a "a"> <!ENTITY b "b">')
-        a, b = list(self.parser)
-        self.assertEqual(a.post, ' ')
-
     def test_word_count(self):
         self.parser.readContents('''\
 <!ENTITY a "one">
 <!ENTITY b "one<br>two">
 <!ENTITY c "one<span>word</span>">
 <!ENTITY d "one <a href='foo'>two</a> three">
 ''')
         a, b, c, d = list(self.parser)
--- a/compare_locales/tests/test_ftl.py
+++ b/compare_locales/tests/test_ftl.py
@@ -1,15 +1,16 @@
 # -*- coding: utf-8 -*-
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 import unittest
 
+from compare_locales import parser
 from compare_locales.tests import ParserTestMixin
 
 
 class TestFluentParser(ParserTestMixin, unittest.TestCase):
     maxDiff = None
     filename = 'foo.ftl'
 
     def test_equality_same(self):
@@ -126,8 +127,58 @@ abc
         self.assertEqual(abc.key, 'abc')
         self.assertEqual(abc.val, '')
         self.assertEqual(abc.all, 'abc\n    .attr = Attr')
         attributes = list(abc.attributes)
         self.assertEqual(len(attributes), 1)
         attr = attributes[0]
         self.assertEqual(attr.key, 'attr')
         self.assertEqual(attr.val, 'Attr')
+
+    def test_whitespace(self):
+        self.parser.readContents('''\
+// Resource Comment
+
+foo = Foo
+
+// Section Comment
+[[ Section ]]
+
+bar = Bar
+
+// Standalone Comment
+
+// Baz Comment
+baz = Baz
+''')
+        entities = list(self.parser.walk())
+
+        self.assertTrue(isinstance(entities[0], parser.Whitespace))
+        self.assertEqual(entities[0].all, '\n')
+
+        self.assertTrue(isinstance(entities[1], parser.FluentEntity))
+        self.assertEqual(entities[1].val, 'Foo')
+
+        self.assertTrue(isinstance(entities[2], parser.Whitespace))
+        self.assertEqual(entities[2].all, '\n\n')
+
+        # XXX We don't yield Sections yet (bug 1399057).
+
+        self.assertTrue(isinstance(entities[3], parser.Whitespace))
+        self.assertEqual(entities[3].all, '\n')
+
+        self.assertTrue(isinstance(entities[4], parser.FluentEntity))
+        self.assertEqual(entities[4].val, 'Bar')
+
+        self.assertTrue(isinstance(entities[5], parser.Whitespace))
+        self.assertEqual(entities[5].all, '\n\n')
+
+        # XXX We don't yield Comments yet (bug 1399057).
+
+        self.assertTrue(isinstance(entities[6], parser.Whitespace))
+        self.assertEqual(entities[6].all, '\n')
+
+        self.assertTrue(isinstance(entities[7], parser.FluentEntity))
+        self.assertEqual(entities[7].val, 'Baz')
+        self.assertEqual(entities[7].entry.comment.content, 'Baz Comment')
+
+        self.assertTrue(isinstance(entities[8], parser.Whitespace))
+        self.assertEqual(entities[8].all, '\n')
--- a/compare_locales/tests/test_ini.py
+++ b/compare_locales/tests/test_ini.py
@@ -6,133 +6,172 @@
 import unittest
 
 from compare_locales.tests import ParserTestMixin
 
 
 mpl2 = '''\
 ; This Source Code Form is subject to the terms of the Mozilla Public
 ; License, v. 2.0. If a copy of the MPL was not distributed with this file,
-; You can obtain one at http://mozilla.org/MPL/2.0/.
-'''
+; You can obtain one at http://mozilla.org/MPL/2.0/.'''
 
 
 class TestIniParser(ParserTestMixin, unittest.TestCase):
 
     filename = 'foo.ini'
 
     def testSimpleHeader(self):
         self._test('''; This file is in the UTF-8 encoding
 [Strings]
 TitleText=Some Title
 ''', (
             ('Comment', 'UTF-8 encoding'),
+            ('Whitespace', '\n'),
             ('IniSection', 'Strings'),
-            ('TitleText', 'Some Title'),))
+            ('Whitespace', '\n'),
+            ('TitleText', 'Some Title'),
+            ('Whitespace', '\n')))
 
     def testMPL2_Space_UTF(self):
         self._test(mpl2 + '''
+
 ; This file is in the UTF-8 encoding
 [Strings]
 TitleText=Some Title
 ''', (
             ('Comment', mpl2),
+            ('Whitespace', '\n\n'),
             ('Comment', 'UTF-8'),
+            ('Whitespace', '\n'),
             ('IniSection', 'Strings'),
-            ('TitleText', 'Some Title'),))
+            ('Whitespace', '\n'),
+            ('TitleText', 'Some Title'),
+            ('Whitespace', '\n')))
 
     def testMPL2_Space(self):
         self._test(mpl2 + '''
+
 [Strings]
 TitleText=Some Title
 ''', (
             ('Comment', mpl2),
+            ('Whitespace', '\n\n'),
             ('IniSection', 'Strings'),
-            ('TitleText', 'Some Title'),))
+            ('Whitespace', '\n'),
+            ('TitleText', 'Some Title'),
+            ('Whitespace', '\n')))
 
     def testMPL2_MultiSpace(self):
-        self._test(mpl2 + '''\
+        self._test(mpl2 + '''
 
 ; more comments
 
 [Strings]
 TitleText=Some Title
 ''', (
             ('Comment', mpl2),
+            ('Whitespace', '\n\n'),
             ('Comment', 'more comments'),
+            ('Whitespace', '\n\n'),
             ('IniSection', 'Strings'),
-            ('TitleText', 'Some Title'),))
+            ('Whitespace', '\n'),
+            ('TitleText', 'Some Title'),
+            ('Whitespace', '\n')))
 
     def testMPL2_JunkBeforeCategory(self):
-        self._test(mpl2 + '''\
+        self._test(mpl2 + '''
 Junk
 [Strings]
 TitleText=Some Title
 ''', (
             ('Comment', mpl2),
-            ('Junk', 'Junk'),
+            ('Whitespace', '\n'),
+            ('Junk', 'Junk\n'),
             ('IniSection', 'Strings'),
-            ('TitleText', 'Some Title')))
+            ('Whitespace', '\n'),
+            ('TitleText', 'Some Title'),
+            ('Whitespace', '\n')))
 
     def test_TrailingComment(self):
         self._test(mpl2 + '''
+
 [Strings]
 TitleText=Some Title
 ;Stray trailing comment
 ''', (
             ('Comment', mpl2),
+            ('Whitespace', '\n\n'),
             ('IniSection', 'Strings'),
+            ('Whitespace', '\n'),
             ('TitleText', 'Some Title'),
-            ('Comment', 'Stray trailing')))
+            ('Whitespace', '\n'),
+            ('Comment', 'Stray trailing'),
+            ('Whitespace', '\n')))
 
     def test_SpacedTrailingComments(self):
         self._test(mpl2 + '''
+
 [Strings]
 TitleText=Some Title
 
 ;Stray trailing comment
 ;Second stray comment
 
 ''', (
             ('Comment', mpl2),
+            ('Whitespace', '\n\n'),
             ('IniSection', 'Strings'),
+            ('Whitespace', '\n'),
             ('TitleText', 'Some Title'),
-            ('Comment', 'Second stray comment')))
+            ('Whitespace', '\n\n'),
+            ('Comment', 'Second stray comment'),
+            ('Whitespace', '\n\n')))
 
     def test_TrailingCommentsAndJunk(self):
         self._test(mpl2 + '''
+
 [Strings]
 TitleText=Some Title
 
 ;Stray trailing comment
 Junk
 ;Second stray comment
 
 ''', (
             ('Comment', mpl2),
+            ('Whitespace', '\n\n'),
             ('IniSection', 'Strings'),
+            ('Whitespace', '\n'),
             ('TitleText', 'Some Title'),
+            ('Whitespace', '\n\n'),
             ('Comment', 'Stray trailing'),
-            ('Junk', 'Junk'),
-            ('Comment', 'Second stray comment')))
+            ('Whitespace', '\n'),
+            ('Junk', 'Junk\n'),
+            ('Comment', 'Second stray comment'),
+            ('Whitespace', '\n\n')))
 
     def test_JunkInbetweenEntries(self):
         self._test(mpl2 + '''
+
 [Strings]
 TitleText=Some Title
 
 Junk
 
 Good=other string
 ''', (
             ('Comment', mpl2),
+            ('Whitespace', '\n\n'),
             ('IniSection', 'Strings'),
+            ('Whitespace', '\n'),
             ('TitleText', 'Some Title'),
-            ('Junk', 'Junk'),
-            ('Good', 'other string')))
+            ('Whitespace', '\n\n'),
+            ('Junk', 'Junk\n\n'),
+            ('Good', 'other string'),
+            ('Whitespace', '\n')))
 
     def test_empty_file(self):
         self._test('', tuple())
         self._test('\n', (('Whitespace', '\n'),))
         self._test('\n\n', (('Whitespace', '\n\n'),))
         self._test(' \n\n', (('Whitespace', ' \n\n'),))
 
 
--- a/compare_locales/tests/test_merge.py
+++ b/compare_locales/tests/test_merge.py
@@ -288,19 +288,19 @@ class TestDTD(unittest.TestCase, Content
                     'missing': 1,
                     'missing_w': 1,
                     'unchanged': 2,
                     'unchanged_w': 2
                 }},
              'details': {
                  'l10n.dtd': [
                      {'error': u'Unparsed content "<!ENTY bar '
-                               u'\'gimmick\'>" '
+                               u'\'gimmick\'>\n" '
                                u'from line 2 column 1 to '
-                               u'line 2 column 22'},
+                               u'line 3 column 1'},
                      {'missingEntity': u'bar'}]
                 }
              })
         mergefile = mozpath.join(self.tmp, "merge", "l10n.dtd")
         self.assertTrue(os.path.isfile(mergefile))
         p = getParser(mergefile)
         p.readFile(mergefile)
         [m, n] = p.parse()
--- a/compare_locales/tests/test_properties.py
+++ b/compare_locales/tests/test_properties.py
@@ -17,21 +17,25 @@ class TestPropertiesParser(ParserTestMix
 two_line = This is the first \
 of two lines
 one_line_trailing = This line ends in \\
 and has junk
 two_lines_triple = This line is one of two and ends in \\\
 and still has another line coming
 ''', (
             ('one_line', 'This is one line'),
+            ('Whitespace', '\n'),
             ('two_line', u'This is the first of two lines'),
+            ('Whitespace', '\n'),
             ('one_line_trailing', u'This line ends in \\'),
+            ('Whitespace', '\n'),
             ('Junk', 'and has junk\n'),
             ('two_lines_triple', 'This line is one of two and ends in \\'
-             'and still has another line coming')))
+             'and still has another line coming'),
+            ('Whitespace', '\n')))
 
     def testProperties(self):
         # port of netwerk/test/PropertiesTest.cpp
         self.parser.readContents(self.resource('test.properties'))
         ref = ['1', '2', '3', '4', '5', '6', '7', '8',
                'this is the first part of a continued line '
                'and here is the 2nd part']
         i = iter(self.parser)
@@ -58,17 +62,21 @@ and an end''', (('bar', 'one line with a
 
     def test_license_header(self):
         self._test('''\
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 foo=value
-''', (('Comment', 'MPL'), ('foo', 'value')))
+''', (
+            ('Comment', 'MPL'),
+            ('Whitespace', '\n\n'),
+            ('foo', 'value'),
+            ('Whitespace', '\n')))
 
     def test_escapes(self):
         self.parser.readContents(r'''
 # unicode escapes
 zero = some \unicode
 one = \u0
 two = \u41
 three = \u042
@@ -82,60 +90,73 @@ seven = \n\r\t\\
             self.assertEqual(e.val, r)
 
     def test_trailing_comment(self):
         self._test('''first = string
 second = string
 
 #
 #commented out
-''', (('first', 'string'), ('second', 'string'),
-            ('Comment', 'commented out')))
+''', (
+            ('first', 'string'),
+            ('Whitespace', '\n'),
+            ('second', 'string'),
+            ('Whitespace', '\n\n'),
+            ('Comment', 'commented out'),
+            ('Whitespace', '\n')))
 
     def test_trailing_newlines(self):
         self._test('''\
 foo = bar
 
 \x20\x20
-  ''', (('foo', 'bar'),))
+  ''', (('foo', 'bar'), ('Whitespace', '\n\n\x20\x20\n ')))
 
     def test_just_comments(self):
         self._test('''\
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 # LOCALIZATION NOTE These strings are used inside the Promise debugger
 # which is available as a panel in the Debugger.
-''', (('Comment', 'MPL'), ('Comment', 'LOCALIZATION NOTE')))
+''', (
+            ('Comment', 'MPL'),
+            ('Whitespace', '\n\n'),
+            ('Comment', 'LOCALIZATION NOTE'),
+            ('Whitespace', '\n')))
 
     def test_just_comments_without_trailing_newline(self):
         self._test('''\
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 # LOCALIZATION NOTE These strings are used inside the Promise debugger
 # which is available as a panel in the Debugger.''', (
-            ('Comment', 'MPL'), ('Comment', 'LOCALIZATION NOTE')))
+            ('Comment', 'MPL'),
+            ('Whitespace', '\n\n'),
+            ('Comment', 'LOCALIZATION NOTE')))
 
     def test_trailing_comment_and_newlines(self):
         self._test('''\
 # LOCALIZATION NOTE These strings are used inside the Promise debugger
 # which is available as a panel in the Debugger.
 
 
 
-''',  (('Comment', 'LOCALIZATION NOTE'),))
+''',  (
+            ('Comment', 'LOCALIZATION NOTE'),
+            ('Whitespace', '\n\n\n')))
 
     def test_empty_file(self):
         self._test('', tuple())
         self._test('\n', (('Whitespace', '\n'),))
         self._test('\n\n', (('Whitespace', '\n\n'),))
-        self._test(' \n\n', (('Whitespace', ' \n\n'),))
+        self._test(' \n\n', (('Whitespace', '\n\n'),))
 
     def test_positions(self):
         self.parser.readContents('''\
 one = value
 two = other \\
 escaped value
 ''')
         one, two = list(self.parser)