Remove Parser.postProcessValue in favor of Entity subclasses. r=Pike
authorStaś Małolepszy <stas@mozilla.com>
Thu, 22 Jun 2017 13:56:55 +0200
changeset 278 868e29f6439c679b15972930e47f90ece2e4cf65
parent 277 c0aa94185f560f909677e8cb11a4ff8e776d1f76
child 279 2b584b77ab240e5c43133e52a3490fcf91663428
child 281 a5d2a6476e667c61fbb8c180df14b2f75f9ef2b1
push id77
push usersmalolepszy@mozilla.com
push dateTue, 27 Jun 2017 17:11:36 +0000
reviewersPike
Remove Parser.postProcessValue in favor of Entity subclasses. r=Pike MozReview-Commit-ID: 1ns96XkFEQx
compare_locales/checks.py
compare_locales/compare.py
compare_locales/parser.py
compare_locales/tests/test_checks.py
compare_locales/tests/test_ftl.py
compare_locales/tests/test_merge.py
--- a/compare_locales/checks.py
+++ b/compare_locales/checks.py
@@ -5,17 +5,17 @@
 import re
 from difflib import SequenceMatcher
 from xml import sax
 try:
     from cStringIO import StringIO
 except ImportError:
     from StringIO import StringIO
 
-from compare_locales.parser import DTDParser, PropertiesParser
+from compare_locales.parser import DTDParser, PropertiesEntity
 
 
 class Checker(object):
     '''Abstract class to implement checks per file type.
     '''
     pattern = None
 
     @classmethod
@@ -75,19 +75,19 @@ class PropertiesChecker(Checker):
                 return
             if lpats - pats:
                 yield ('error', 0, 'unreplaced variables in l10n',
                        'plural')
                 return
             return
         # check for lost escapes
         raw_val = l10nEnt.raw_val
-        for m in PropertiesParser.escape.finditer(raw_val):
+        for m in PropertiesEntity.escape.finditer(raw_val):
             if m.group('single') and \
-               m.group('single') not in PropertiesParser.known_escapes:
+               m.group('single') not in PropertiesEntity.known_escapes:
                 yield ('warning', m.start(),
                        'unknown escape sequence, \\' + m.group('single'),
                        'escape')
         try:
             refSpecs = self.getPrintfSpecs(refValue)
         except PrintfException:
             refSpecs = []
         if refSpecs:
--- a/compare_locales/compare.py
+++ b/compare_locales/compare.py
@@ -508,17 +508,17 @@ class ContentComparer:
                     obsolete += 1
             else:
                 # entity found in both ref and l10n, check for changed
                 refent = ref[0][ref[1][entity]]
                 l10nent = l10n_entities[l10n_map[entity]]
                 if self.keyRE.search(entity):
                     keys += 1
                 else:
-                    if refent == l10nent:
+                    if refent.equals(l10nent):
                         self.doUnchanged(l10nent)
                         unchanged += 1
                         unchanged_w += refent.count_words()
                     else:
                         self.doChanged(ref_file, refent, l10nent)
                         changed += 1
                         changed_w += refent.count_words()
                         # run checks:
--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -36,27 +36,26 @@ class EntityBase(object):
     3: entity key (name)
     4: entity value
     5: post white space
                                                  <--[1]
     <!ENTITY key "value">
 
     <-------[2]--------->
     '''
-    def __init__(self, ctx, pp, pre_comment,
+    def __init__(self, ctx, pre_comment,
                  span, pre_ws_span, def_span,
                  key_span, val_span, post_span):
         self.ctx = ctx
         self.span = span
         self.pre_ws_span = pre_ws_span
         self.def_span = def_span
         self.key_span = key_span
         self.val_span = val_span
         self.post_span = post_span
-        self.pp = pp
         self.pre_comment = pre_comment
         pass
 
     def position(self, offset=0):
         """Get the 1-based line and column of the character
         with given offset into the Entity.
 
         If offset is negative, return the end of the Entity.
@@ -88,32 +87,29 @@ class EntityBase(object):
         return self.ctx.contents[self.pre_ws_span[0]:self.pre_ws_span[1]]
 
     def get_def(self):
         return self.ctx.contents[self.def_span[0]:self.def_span[1]]
 
     def get_key(self):
         return self.ctx.contents[self.key_span[0]:self.key_span[1]]
 
-    def get_val(self):
-        return self.pp(self.ctx.contents[self.val_span[0]:self.val_span[1]])
-
     def get_raw_val(self):
         return self.ctx.contents[self.val_span[0]:self.val_span[1]]
 
     def get_post(self):
         return self.ctx.contents[self.post_span[0]:self.post_span[1]]
 
     # getters
 
     all = property(get_all)
     pre_ws = property(get_pre_ws)
     definition = property(get_def)
     key = property(get_key)
-    val = property(get_val)
+    val = property(get_raw_val)
     raw_val = property(get_raw_val)
     post = property(get_post)
 
     def __repr__(self):
         return self.key
 
     re_br = re.compile('<br\s*/?>', re.U)
     re_sgml = re.compile('</?\w+.*?>', re.U | re.M)
@@ -121,45 +117,32 @@ class EntityBase(object):
     def count_words(self):
         """Count the words in an English string.
         Replace a couple of xml markup to make that safer, too.
         """
         value = self.re_br.sub(u'\n', self.val)
         value = self.re_sgml.sub(u'', value)
         return len(value.split())
 
-    def __eq__(self, other):
+    def equals(self, other):
         return self.key == other.key and self.val == other.val
 
 
 class Entity(EntityBase):
-    def value_position(self, offset=0):
-        # DTDChecker already returns tuples of (line, col) positions
-        if isinstance(offset, tuple):
-            line_pos, col_pos = offset
-            line, col = super(Entity, self).value_position()
-            if line_pos == 1:
-                col = col + col_pos
-            else:
-                col = col_pos
-                line += line_pos - 1
-            return line, col
-        else:
-            return super(Entity, self).value_position(offset)
+    pass
 
 
 class Comment(EntityBase):
     def __init__(self, ctx, span, pre_ws_span, def_span,
                  post_span):
         self.ctx = ctx
         self.span = span
         self.pre_ws_span = pre_ws_span
         self.def_span = def_span
         self.post_span = post_span
-        self.pp = lambda v: v
 
     @property
     def key(self):
         return None
 
     @property
     def val(self):
         return None
@@ -211,17 +194,16 @@ class Whitespace(EntityBase):
     '''Entity-like object representing an empty file with whitespace,
     if allowed
     '''
     def __init__(self, ctx, span):
         self.ctx = ctx
         self.key_span = self.val_span = self.span = span
         self.def_span = self.pre_ws_span = (span[0], span[0])
         self.post_span = (span[1], span[1])
-        self.pp = lambda v: v
 
     def __repr__(self):
         return self.raw_val
 
 
 class Parser(object):
     capabilities = CAN_SKIP | CAN_MERGE
     tail = re.compile('\s+\Z')
@@ -270,33 +252,30 @@ class Parser(object):
     def parse(self):
         l = []
         m = {}
         for e in self:
             m[e.key] = len(l)
             l.append(e)
         return (l, m)
 
-    def postProcessValue(self, val):
-        return val
-
     def __iter__(self):
         return self.walk(onlyEntities=True)
 
     def walk(self, onlyEntities=False):
         if not self.ctx:
             # loading file failed, or we just didn't load anything
             return
         ctx = self.ctx
         contents = ctx.contents
         offset = 0
         entity, offset = self.getEntity(ctx, offset)
         while entity:
             if (not onlyEntities or
-                    type(entity) is Entity or
+                    isinstance(entity, Entity) or
                     type(entity) is Junk):
                 yield entity
             entity, offset = self.getEntity(ctx, offset)
         if len(contents) > offset:
             yield Junk(ctx, (offset, len(contents)))
 
     def getEntity(self, ctx, offset):
         m = self.reKey.match(ctx.contents, offset)
@@ -323,17 +302,17 @@ class Parser(object):
                 return (Whitespace(ctx, (offset, white_end)), white_end)
             else:
                 return (None, offset)
         return (Junk(ctx, (offset, junkend)), junkend)
 
     def createEntity(self, ctx, m):
         pre_comment = unicode(self.last_comment) if self.last_comment else ''
         self.last_comment = ''
-        return Entity(ctx, self.postProcessValue, pre_comment,
+        return Entity(ctx, pre_comment,
                       *[m.span(i) for i in xrange(6)])
 
 
 def getParser(path):
     for item in __constructors:
         if re.search(item[0], path):
             return item[1]
     raise UserWarning("Cannot find Parser")
@@ -348,16 +327,32 @@ def getParser(path):
 # 6: post comment (and white space) in the same line (dtd only)
 #                                            <--[1]
 # <!-- pre comments -->                      <--[2]
 # <!ENTITY key "value"> <!-- comment -->
 #
 # <-------[3]---------><------[6]------>
 
 
+class DTDEntity(Entity):
+    def value_position(self, offset=0):
+        # DTDChecker already returns tuples of (line, col) positions
+        if isinstance(offset, tuple):
+            line_pos, col_pos = offset
+            line, col = super(DTDEntity, self).value_position()
+            if line_pos == 1:
+                col = col + col_pos
+            else:
+                col = col_pos
+                line += line_pos - 1
+            return line, col
+        else:
+            return super(DTDEntity, self).value_position(offset)
+
+
 class DTDParser(Parser):
     # http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar
     # ":" | [A-Z] | "_" | [a-z] |
     # [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
     # | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
     # [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
     # [#x10000-#xEFFFF]
     CharMinusDash = u'\x09\x0A\x0D\u0020-\u002C\u002E-\uD7FF\uE000-\uFFFD'
@@ -395,37 +390,50 @@ class DTDParser(Parser):
         if offset is 0 and self.reHeader.match(ctx.contents):
             offset += 1
         entity, inneroffset = Parser.getEntity(self, ctx, offset)
         if (entity and isinstance(entity, Junk)) or entity is None:
             m = self.rePE.match(ctx.contents, offset)
             if m:
                 inneroffset = m.end()
                 self.last_comment = ''
-                entity = Entity(ctx, self.postProcessValue, '',
-                                *[m.span(i) for i in xrange(6)])
+                entity = DTDEntity(ctx, '', *[m.span(i) for i in xrange(6)])
         return (entity, inneroffset)
 
     def createEntity(self, ctx, m):
         valspan = m.span('val')
         valspan = (valspan[0]+1, valspan[1]-1)
         pre_comment = unicode(self.last_comment) if self.last_comment else ''
         self.last_comment = ''
-        return Entity(ctx, self.postProcessValue, pre_comment,
-                      m.span(),
-                      m.span('pre'),
-                      m.span('entity'), m.span('key'), valspan,
-                      m.span('post'))
+        return DTDEntity(ctx, pre_comment,
+                         m.span(),
+                         m.span('pre'),
+                         m.span('entity'), m.span('key'), valspan,
+                         m.span('post'))
 
 
-class PropertiesParser(Parser):
+class PropertiesEntity(Entity):
     escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|'
                         '(?P<nl>\n\s*)|(?P<single>.))', re.M)
     known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'}
 
+    @property
+    def val(self):
+        def unescape(m):
+            found = m.groupdict()
+            if found['uni']:
+                return unichr(int(found['uni'][1:], 16))
+            if found['nl']:
+                return ''
+            return self.known_escapes.get(found['single'], found['single'])
+
+        return self.escape.sub(unescape, self.raw_val)
+
+
+class PropertiesParser(Parser):
     def __init__(self):
         self.reKey = re.compile('^(\s*)'
                                 '([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
         self.reComment = re.compile('(\s*)(((?:[#!][^\n]*\n?)+))', re.M)
         self._escapedEnd = re.compile(r'\\+$')
         self._trailingWS = re.compile(r'\s*(?:\n|\Z)', re.M)
         Parser.__init__(self)
 
@@ -464,50 +472,38 @@ class PropertiesParser(Parser):
             # strip trailing whitespace
             ws = self._trailingWS.search(contents, startline)
             if ws:
                 endval = ws.start()
                 offset = ws.end()
             pre_comment = (unicode(self.last_comment) if self.last_comment
                            else '')
             self.last_comment = ''
-            entity = Entity(ctx, self.postProcessValue, pre_comment,
-                            (m.start(), offset),   # full span
-                            m.span(1),  # leading whitespan
-                            (m.start(2), offset),   # entity def span
-                            m.span(2),   # key span
-                            (m.end(), endval),   # value span
-                            (offset, offset))  # post comment span, empty
+            entity = PropertiesEntity(
+                ctx, pre_comment,
+                (m.start(), offset),   # full span
+                m.span(1),  # leading whitespan
+                (m.start(2), offset),   # entity def span
+                m.span(2),   # key span
+                (m.end(), endval),   # value span
+                (offset, offset))  # post comment span, empty
             return (entity, offset)
         return self.getTrailing(ctx, offset, self.reKey, self.reComment)
 
-    def postProcessValue(self, val):
-
-        def unescape(m):
-            found = m.groupdict()
-            if found['uni']:
-                return unichr(int(found['uni'][1:], 16))
-            if found['nl']:
-                return ''
-            return self.known_escapes.get(found['single'], found['single'])
-        val = self.escape.sub(unescape, val)
-        return val
-
 
 class DefinesInstruction(EntityBase):
     '''Entity-like object representing processing instructions in inc files
     '''
     def __init__(self, ctx, span, pre_ws_span, def_span, val_span, post_span):
         self.ctx = ctx
         self.span = span
         self.pre_ws_span = pre_ws_span
         self.def_span = def_span
         self.key_span = self.val_span = val_span
         self.post_span = post_span
-        self.pp = lambda v: v
 
     def __repr__(self):
         return self.raw_val
 
 
 class DefinesParser(Parser):
     # can't merge, #unfilter needs to be the last item, which we don't support
     capabilities = CAN_COPY
@@ -553,17 +549,16 @@ class IniSection(EntityBase):
     '''
     def __init__(self, ctx, span, pre_ws_span, def_span, val_span, post_span):
         self.ctx = ctx
         self.span = span
         self.pre_ws_span = pre_ws_span
         self.def_span = def_span
         self.key_span = self.val_span = val_span
         self.post_span = post_span
-        self.pp = lambda v: v
 
     def __repr__(self):
         return self.raw_val
 
 
 class IniParser(Parser):
     '''
     Parse files of the form:
@@ -623,36 +618,32 @@ class FluentEntity(Entity):
 
         if entry.value is not None:
             self.val_span = (entry.value.span.start, entry.value.span.end)
         else:
             self.val_span = (0, 0)
 
         self.entry = entry
 
-    def pp(self, value):
-        # XXX Normalize whitespace?
-        return value
-
     _word_count = None
 
     def count_words(self):
         if self._word_count is None:
             self._word_count = 0
 
             def count_words(node):
                 if isinstance(node, ftl.TextElement):
                     self._word_count += len(node.value.split())
                 return node
 
             self.entry.traverse(count_words)
 
         return self._word_count
 
-    def __eq__(self, other):
+    def equals(self, other):
         return self.entry.equals(
             other.entry, ignored_fields=self.ignored_fields)
 
     # Positions yielded by FluentChecker.check are absolute offsets from the
     # beginning of the file.  This is different from the base Checker behavior
     # which yields offsets from the beginning of the current entity's value.
     def position(self, pos=None):
         if pos is None:
--- a/compare_locales/tests/test_checks.py
+++ b/compare_locales/tests/test_checks.py
@@ -1,17 +1,17 @@
 # -*- coding: utf-8 -*-
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 import unittest
 
 from compare_locales.checks import getChecker
-from compare_locales.parser import getParser, Parser, Entity
+from compare_locales.parser import getParser, Parser, DTDEntity
 from compare_locales.paths import File
 
 
 class BaseHelper(unittest.TestCase):
     file = None
     refContent = None
 
     def setUp(self):
@@ -235,26 +235,27 @@ class TestAndroid(unittest.TestCase):
     """
     apos_msg = u"Apostrophes in Android DTDs need escaping with \\' or " + \
                u"\\u0027, or use \u2019, or put string in quotes."
     quot_msg = u"Quotes in Android DTDs need escaping with \\\" or " + \
                u"\\u0022, or put string in apostrophes."
 
     def getEntity(self, v):
         ctx = Parser.Context(v)
-        return Entity(ctx, lambda s: s, '', (0, len(v)), (), (), (),
-                      (0, len(v)), ())
+        return DTDEntity(
+            ctx, '', (0, len(v)), (), (), (), (0, len(v)), ())
 
     def getDTDEntity(self, v):
         v = v.replace('"', '&quot;')
         ctx = Parser.Context('<!ENTITY foo "%s">' % v)
-        return Entity(ctx,
-                      lambda s: s, '',
-                      (0, len(v) + 16), (), (), (9, 12),
-                      (14, len(v) + 14), ())
+        return DTDEntity(
+            ctx,
+            '',
+            (0, len(v) + 16), (), (), (9, 12),
+            (14, len(v) + 14), ())
 
     def test_android_dtd(self):
         """Testing the actual android checks. The logic is involved,
         so this is a lot of nitty gritty detail tests.
         """
         f = File("embedding/android/strings.dtd", "strings.dtd",
                  "embedding/android")
         checker = getChecker(f, extra_tests=['android-dtd'])
--- a/compare_locales/tests/test_ftl.py
+++ b/compare_locales/tests/test_ftl.py
@@ -16,29 +16,29 @@ class TestFluentParser(ParserTestMixin, 
         source = 'progress = Progress: { NUMBER($num, style: "percent") }.'
 
         self.parser.readContents(source)
         [ent1] = list(self.parser)
 
         self.parser.readContents(source)
         [ent2] = list(self.parser)
 
-        self.assertEqual(ent1, ent2)
+        self.assertTrue(ent1.equals(ent2))
 
     def test_equality_different_whitespace(self):
         source1 = 'foo = { $arg }'
         source2 = 'foo = {    $arg    }'
 
         self.parser.readContents(source1)
         [ent1] = list(self.parser)
 
         self.parser.readContents(source2)
         [ent2] = list(self.parser)
 
-        self.assertEqual(ent1, ent2)
+        self.assertTrue(ent1.equals(ent2))
 
     def test_word_count(self):
         self.parser.readContents('''\
 a = One
 b = One two three
 c = One { $arg } two
 d =
     One { $arg ->
--- a/compare_locales/tests/test_merge.py
+++ b/compare_locales/tests/test_merge.py
@@ -471,22 +471,23 @@ eff = lEff {
         # validate merge results
         mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl")
         self.assert_(os.path.exists(mergepath))
 
         p = getParser(mergepath)
         p.readFile(mergepath)
         merged_entities, merged_map = p.parse()
         self.assertEqual([e.key for e in merged_entities], ["foo"])
+        merged_foo = merged_entities[merged_map['foo']]
+
         # foo should be l10n
         p.readFile(self.l10n)
         l10n_entities, l10n_map = p.parse()
-        self.assertEqual(
-            merged_entities[merged_map['foo']],
-            l10n_entities[l10n_map['foo']])
+        l10n_foo = l10n_entities[l10n_map['foo']]
+        self.assertTrue(merged_foo.equals(l10n_foo))
 
     def testMismatchingAttributes(self):
         self.reference("""
 foo = Foo
 bar = Bar
   .tender = Attribute value
 eff = Eff
 """)
@@ -527,22 +528,23 @@ eff = lEff
         # validate merge results
         mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl")
         self.assert_(os.path.exists(mergepath))
 
         p = getParser(mergepath)
         p.readFile(mergepath)
         merged_entities, merged_map = p.parse()
         self.assertEqual([e.key for e in merged_entities], ["eff"])
+        merged_eff = merged_entities[merged_map['eff']]
+
         # eff should be l10n
         p.readFile(self.l10n)
         l10n_entities, l10n_map = p.parse()
-        self.assertEqual(
-            merged_entities[merged_map['eff']],
-            l10n_entities[l10n_map['eff']])
+        l10n_eff = l10n_entities[l10n_map['eff']]
+        self.assertTrue(merged_eff.equals(l10n_eff))
 
     def testMismatchingValues(self):
         self.reference("""
 foo = Foo
   .foottr = something
 bar
   .tender = Attribute value
 """)