Bug 1399059 - Part 4 - Parser.walk(withWhitespace=False). r?Pike draft
authorStaś Małolepszy <stas@mozilla.com>
Wed, 20 Sep 2017 11:32:25 +0200
changeset 330 df70a282f950d7efb65de38e2fa76974a7cf72fb
parent 329 db20f168ef12041571b277d7bfb14582246f4b2a
child 331 0dd7555cbd790ab72dde3873a7365dce313cba9f
push id105
push usersmalolepszy@mozilla.com
push dateThu, 21 Sep 2017 17:02:01 +0000
reviewersPike
bugs1399059
Bug 1399059 - Part 4 - Parser.walk(withWhitespace=False). r?Pike MozReview-Commit-ID: BQw8bnWJgMy
compare_locales/parser.py
--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -127,17 +127,17 @@ class Comment(EntityBase):
     def key(self):
         return None
 
     @property
     def val(self):
         return None
 
     def __repr__(self):
-        return self.all
+        return self.definition
 
 
 class Junk(object):
     '''
     An almost-Entity, representing junk data that we didn't parse.
     This way, we can signal bad content as stuff we don't understand.
     And the either fix that, or report real bugs in localizations.
     '''
@@ -175,18 +175,17 @@ class Junk(object):
 
 
 class Whitespace(EntityBase):
     '''Entity-like object representing an empty file with whitespace,
     if allowed
     '''
     def __init__(self, ctx, span):
         self.ctx = ctx
-        self.key_span = self.val_span = self.span = span
-        self.def_span = (span[0], span[0])
+        self.span = self.def_span = self.key_span = self.val_span = span
 
     def __repr__(self):
         return self.raw_val
 
 
 class Parser(object):
     capabilities = CAN_SKIP | CAN_MERGE
     tail = re.compile('\s+\Z')
@@ -238,35 +237,45 @@ class Parser(object):
         for e in self:
             m[e.key] = len(l)
             l.append(e)
         return (l, m)
 
     def __iter__(self):
         return self.walk(onlyEntities=True)
 
-    def walk(self, onlyEntities=False):
+    def walk(self, onlyEntities=False, withWhitespace=False):
         if not self.ctx:
             # loading file failed, or we just didn't load anything
             return
         ctx = self.ctx
         contents = ctx.contents
 
-        offset = 0
-        entity = self.getEntity(ctx, offset)
+        next_entity_offset = 0
+        entity = self.getEntity(ctx, next_entity_offset)
         while entity:
-            offset = entity.span[1]
+            if withWhitespace:
+                def_start = entity.def_span[0]
+                if next_entity_offset < def_start:
+                    yield Whitespace(ctx, (next_entity_offset, def_start))
 
             if (not onlyEntities or isinstance(entity, (Entity, Junk))):
                 yield entity
 
-            entity = self.getEntity(ctx, offset)
+            if withWhitespace:
+                def_end = entity.def_span[1]
+                outer_end = entity.span[1]
+                if def_end < outer_end:
+                    yield Whitespace(ctx, (def_end, outer_end))
 
-        if len(contents) > offset:
-            yield Junk(ctx, (offset, len(contents)))
+            next_entity_offset = entity.span[1]
+            entity = self.getEntity(ctx, next_entity_offset)
+
+        if len(contents) > next_entity_offset:
+            yield Junk(ctx, (next_entity_offset, len(contents)))
 
     def getEntity(self, ctx, offset):
         m = self.reKey.match(ctx.contents, offset)
         if m:
             entity = self.createEntity(ctx, m)
             return entity
         m = self.reComment.match(ctx.contents, offset)
         if m:
@@ -410,19 +419,18 @@ class PropertiesEntity(Entity):
                 return ''
             return self.known_escapes.get(found['single'], found['single'])
 
         return self.escape.sub(unescape, self.raw_val)
 
 
 class PropertiesParser(Parser):
     def __init__(self):
-        self.reKey = re.compile('^\s*'
-                                '([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
-        self.reComment = re.compile('\s*(((?:[#!][^\n]*\n?)+))', re.M)
+        self.reKey = re.compile('^\s*([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
+        self.reComment = re.compile('\s*((?:[#!][^\n]*\n?)+)', re.M)
         self._escapedEnd = re.compile(r'\\+$')
         self._trailingWS = re.compile(r'\s*(?:\n|\Z)', re.M)
         Parser.__init__(self)
 
     def getEntity(self, ctx, offset):
         # overwritten to parse values line by line
         contents = ctx.contents
         m = self.reComment.match(contents, offset)
@@ -461,17 +469,17 @@ class PropertiesParser(Parser):
             if ws:
                 endval = ws.start()
                 offset = ws.end()
             pre_comment = self.last_comment
             self.last_comment = None
             entity = PropertiesEntity(
                 ctx, pre_comment,
                 (m.start(), offset),   # full span
-                (m.start(1), offset),   # entity def span
+                (m.start(1), endval),   # def span
                 m.span(1),   # key span
                 (m.end(), endval))   # value span
             return entity
         return self.getTrailing(ctx, offset, self.reKey, self.reComment)
 
 
 class DefinesInstruction(EntityBase):
     '''Entity-like object representing processing instructions in inc files