bug 1310980, part 2: fix PropertiesParser draft
authorAxel Hecht <axel@pike.org>
Mon, 24 Oct 2016 15:26:21 +0200
changeset 143 60bb42f6bf9eb7f802ed37ffdae162b5903c63f8
parent 142 db503092c7006fc987f1ac861c8cd29bc1a75a2a
child 144 cdfb15a3c6829bb28ce17893e255b94ad201e072
push id30
push useraxel@mozilla.com
push dateMon, 24 Oct 2016 14:52:08 +0000
bugs1310980
bug 1310980, part 2: fix PropertiesParser MozReview-Commit-ID: 9YYi8vGqRUZ
compare_locales/parser.py
compare_locales/tests/__init__.py
compare_locales/tests/test_properties.py
--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -348,67 +348,88 @@ class DTDParser(Parser):
 
 class PropertiesParser(Parser):
     escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|'
                         '(?P<nl>\n\s*)|(?P<single>.))', re.M)
     known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'}
 
     def __init__(self):
         self.reKey = re.compile('^(\s*)'
-                                '((?:[#!].*?\n\s*)*)'
                                 '([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
         self.reHeader = re.compile('^\s*([#!].*\s*)+')
-        self.reFooter = re.compile('\s*([#!].*\s*)*$')
+        self.reComment = re.compile('(\s*)(((?:[#!][^\n]*\n?)+))', re.M)
         self._escapedEnd = re.compile(r'\\+$')
-        self._trailingWS = re.compile(r'[ \t]*$')
+        self._trailingWS = re.compile(r'\s*(?:\n|\Z)', re.M)
         Parser.__init__(self)
 
     def getHeader(self, contents, offset):
         header = ''
         h = self.reHeader.match(contents, offset)
         if h:
             candidate = h.group()
             if 'http://mozilla.org/MPL/2.0/' in candidate or \
                     'LICENSE BLOCK' in candidate:
                 header = candidate
                 offset = h.end()
         return (header, offset)
 
     def getEntity(self, ctx, offset):
         # overwritten to parse values line by line
         contents = ctx.contents
+        m = self.reComment.match(contents, offset)
+        if m:
+            spans = [m.span(i) for i in xrange(3)]
+            start_trailing = offset = m.end()
+            while offset < len(contents):
+                m = self._trailingWS.match(contents, offset)
+                if not m:
+                    break
+                offset = m.end()
+            spans.append((start_trailing, offset))
+            return (Comment(ctx, *spans), offset)
         m = self.reKey.match(contents, offset)
         if m:
-            offset = m.end()
+            startline = offset = m.end()
             while True:
                 endval = nextline = contents.find('\n', offset)
                 if nextline == -1:
                     endval = offset = len(contents)
                     break
                 # is newline escaped?
                 _e = self._escapedEnd.search(contents, offset, nextline)
                 offset = nextline + 1
                 if _e is None:
                     break
                 # backslashes at end of line, if 2*n, not escaped
                 if len(_e.group()) % 2 == 0:
                     break
+                startline = offset
             # strip trailing whitespace
-            ws = self._trailingWS.search(contents, m.end(), offset)
+            ws = self._trailingWS.search(contents, startline)
             if ws:
-                endval -= ws.end() - ws.start()
-            entity = Entity(ctx, self.postProcessValue,
+                endval = ws.start()
+                offset = ws.end()
+            pre_comment = (unicode(self.last_comment) if self.last_comment
+                           else '')
+            self.last_comment = ''
+            entity = Entity(ctx, self.postProcessValue, pre_comment,
                             (m.start(), offset),   # full span
                             m.span(1),  # leading whitespan
-                            (m.start(3), offset),   # entity def span
-                            m.span(3),   # key span
+                            (m.start(2), offset),   # entity def span
+                            m.span(2),   # key span
                             (m.end(), endval),   # value span
                             (offset, offset))  # post comment span, empty
             return (entity, offset)
-        m = self.reKey.search(contents, offset)
+        mkey = self.reKey.search(ctx.contents, offset)
+        mcomment = self.reComment.search(ctx.contents, offset)
+        m = None
+        if mkey and mcomment:
+            m = mkey if mkey.start() < mcomment.start() else mcomment
+        else:
+            m = mkey if mkey else mcomment
         if m:
             # we didn't match, but search, so there's junk between offset
             # and start. We'll match() on the next turn
             junkend = m.start()
             return (Junk(ctx, (offset, junkend)), junkend)
         return (None, offset)
 
     def postProcessValue(self, val):
--- a/compare_locales/tests/__init__.py
+++ b/compare_locales/tests/__init__.py
@@ -33,17 +33,17 @@ class ParserTestMixin():
         return testcontent
 
     def _test(self, content, refs):
         '''Helper to test the parser.
         Compares the result of parsing content with the given list
         of reference keys and values.
         '''
         self.parser.readContents(content)
-        entities = [entity for entity in self.parser]
+        entities = list(self.parser.walk())
         for entity, ref in izip_longest(entities, refs):
             self.assertTrue(entity, 'excess reference entity ' + unicode(ref))
             self.assertTrue(ref, 'excess parsed entity ' + unicode(entity))
             if type(entity) is parser.Entity:
                 self.assertEqual(entity.key, ref[0])
                 self.assertEqual(entity.val, ref[1])
             else:
                 self.assertEqual(type(entity).__name__, ref[0])
--- a/compare_locales/tests/test_properties.py
+++ b/compare_locales/tests/test_properties.py
@@ -58,18 +58,17 @@ and an end''', (('bar', 'one line with a
 
     def test_license_header(self):
         self._test('''\
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 foo=value
-''', (('foo', 'value'),))
-        self.assert_('MPL' in self.parser.header)
+''', (('Comment', 'MPL'), ('foo', 'value')))
 
     def test_escapes(self):
         self.parser.readContents(r'''
 # unicode escapes
 zero = some \unicode
 one = \u0
 two = \u41
 three = \u042
@@ -83,13 +82,63 @@ seven = \n\r\t\\
             self.assertEqual(e.val, r)
 
     def test_trailing_comment(self):
         self._test('''first = string
 second = string
 
 #
 #commented out
-''', (('first', 'string'), ('second', 'string')))
+''', (('first', 'string'), ('second', 'string'),
+            ('Comment', 'commented out')))
+
+    def test_trailing_newlines(self):
+        self._test('''\
+foo = bar
+
+\x20\x20
+  ''', (('foo', 'bar'),))
+
+    def test_just_comments(self):
+        self._test('''\
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# LOCALIZATION NOTE These strings are used inside the Promise debugger
+# which is available as a panel in the Debugger.
+''', (('Comment', 'MPL'), ('Comment', 'LOCALIZATION NOTE')))
+
+    def test_just_comments_without_trailing_newline(self):
+        self._test('''\
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+# LOCALIZATION NOTE These strings are used inside the Promise debugger
+# which is available as a panel in the Debugger.''', (
+            ('Comment', 'MPL'), ('Comment', 'LOCALIZATION NOTE')))
+
+    def test_trailing_comment_and_newlines(self):
+        self._test('''\
+# LOCALIZATION NOTE These strings are used inside the Promise debugger
+# which is available as a panel in the Debugger.
+
+
+
+''',  (('Comment', 'LOCALIZATION NOTE'),))
+
+    def test_positions(self):
+        self.parser.readContents('''\
+one = value
+two = other \\
+escaped value
+''')
+        one, two = list(self.parser)
+        self.assertEqual(one.position(), (1, 1))
+        self.assertEqual(one.value_position(), (1, 7))
+        self.assertEqual(two.position(), (2, 1))
+        self.assertEqual(two.value_position(), (2, 7))
+        self.assertEqual(two.value_position(-1), (3, 14))
+        self.assertEqual(two.value_position(10), (3, 3))
 
 if __name__ == '__main__':
     unittest.main()