bug 1310980, part 2: fix PropertiesParser
MozReview-Commit-ID: 9YYi8vGqRUZ
--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -348,67 +348,88 @@ class DTDParser(Parser):
class PropertiesParser(Parser):
escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|'
'(?P<nl>\n\s*)|(?P<single>.))', re.M)
known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'}
def __init__(self):
self.reKey = re.compile('^(\s*)'
- '((?:[#!].*?\n\s*)*)'
'([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
self.reHeader = re.compile('^\s*([#!].*\s*)+')
- self.reFooter = re.compile('\s*([#!].*\s*)*$')
+ self.reComment = re.compile('(\s*)(((?:[#!][^\n]*\n?)+))', re.M)
self._escapedEnd = re.compile(r'\\+$')
- self._trailingWS = re.compile(r'[ \t]*$')
+ self._trailingWS = re.compile(r'\s*(?:\n|\Z)', re.M)
Parser.__init__(self)
def getHeader(self, contents, offset):
header = ''
h = self.reHeader.match(contents, offset)
if h:
candidate = h.group()
if 'http://mozilla.org/MPL/2.0/' in candidate or \
'LICENSE BLOCK' in candidate:
header = candidate
offset = h.end()
return (header, offset)
def getEntity(self, ctx, offset):
# overwritten to parse values line by line
contents = ctx.contents
+ m = self.reComment.match(contents, offset)
+ if m:
+ spans = [m.span(i) for i in xrange(3)]
+ start_trailing = offset = m.end()
+ while offset < len(contents):
+ m = self._trailingWS.match(contents, offset)
+ if not m:
+ break
+ offset = m.end()
+ spans.append((start_trailing, offset))
+ return (Comment(ctx, *spans), offset)
m = self.reKey.match(contents, offset)
if m:
- offset = m.end()
+ startline = offset = m.end()
while True:
endval = nextline = contents.find('\n', offset)
if nextline == -1:
endval = offset = len(contents)
break
# is newline escaped?
_e = self._escapedEnd.search(contents, offset, nextline)
offset = nextline + 1
if _e is None:
break
# backslashes at end of line, if 2*n, not escaped
if len(_e.group()) % 2 == 0:
break
+ startline = offset
# strip trailing whitespace
- ws = self._trailingWS.search(contents, m.end(), offset)
+ ws = self._trailingWS.search(contents, startline)
if ws:
- endval -= ws.end() - ws.start()
- entity = Entity(ctx, self.postProcessValue,
+ endval = ws.start()
+ offset = ws.end()
+ pre_comment = (unicode(self.last_comment) if self.last_comment
+ else '')
+ self.last_comment = ''
+ entity = Entity(ctx, self.postProcessValue, pre_comment,
(m.start(), offset), # full span
m.span(1), # leading whitespan
- (m.start(3), offset), # entity def span
- m.span(3), # key span
+ (m.start(2), offset), # entity def span
+ m.span(2), # key span
(m.end(), endval), # value span
(offset, offset)) # post comment span, empty
return (entity, offset)
- m = self.reKey.search(contents, offset)
+ mkey = self.reKey.search(ctx.contents, offset)
+ mcomment = self.reComment.search(ctx.contents, offset)
+ m = None
+ if mkey and mcomment:
+ m = mkey if mkey.start() < mcomment.start() else mcomment
+ else:
+ m = mkey if mkey else mcomment
if m:
# we didn't match, but search, so there's junk between offset
# and start. We'll match() on the next turn
junkend = m.start()
return (Junk(ctx, (offset, junkend)), junkend)
return (None, offset)
def postProcessValue(self, val):
--- a/compare_locales/tests/__init__.py
+++ b/compare_locales/tests/__init__.py
@@ -33,17 +33,17 @@ class ParserTestMixin():
return testcontent
def _test(self, content, refs):
'''Helper to test the parser.
Compares the result of parsing content with the given list
of reference keys and values.
'''
self.parser.readContents(content)
- entities = [entity for entity in self.parser]
+ entities = list(self.parser.walk())
for entity, ref in izip_longest(entities, refs):
self.assertTrue(entity, 'excess reference entity ' + unicode(ref))
self.assertTrue(ref, 'excess parsed entity ' + unicode(entity))
if type(entity) is parser.Entity:
self.assertEqual(entity.key, ref[0])
self.assertEqual(entity.val, ref[1])
else:
self.assertEqual(type(entity).__name__, ref[0])
--- a/compare_locales/tests/test_properties.py
+++ b/compare_locales/tests/test_properties.py
@@ -58,18 +58,17 @@ and an end''', (('bar', 'one line with a
def test_license_header(self):
self._test('''\
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
foo=value
-''', (('foo', 'value'),))
- self.assert_('MPL' in self.parser.header)
+''', (('Comment', 'MPL'), ('foo', 'value')))
def test_escapes(self):
self.parser.readContents(r'''
# unicode escapes
zero = some \unicode
one = \u0
two = \u41
three = \u042
@@ -83,13 +82,63 @@ seven = \n\r\t\\
self.assertEqual(e.val, r)
def test_trailing_comment(self):
self._test('''first = string
second = string
#
#commented out
-''', (('first', 'string'), ('second', 'string')))
+''', (('first', 'string'), ('second', 'string'),
+ ('Comment', 'commented out')))
+
+ def test_trailing_newlines(self):
+ self._test('''\
+foo = bar
+
+\x20\x20
+ ''', (('foo', 'bar'),))
+
+ def test_just_comments(self):
+ self._test('''\
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# LOCALIZATION NOTE These strings are used inside the Promise debugger
+# which is available as a panel in the Debugger.
+''', (('Comment', 'MPL'), ('Comment', 'LOCALIZATION NOTE')))
+
+ def test_just_comments_without_trailing_newline(self):
+ self._test('''\
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+# LOCALIZATION NOTE These strings are used inside the Promise debugger
+# which is available as a panel in the Debugger.''', (
+ ('Comment', 'MPL'), ('Comment', 'LOCALIZATION NOTE')))
+
+ def test_trailing_comment_and_newlines(self):
+ self._test('''\
+# LOCALIZATION NOTE These strings are used inside the Promise debugger
+# which is available as a panel in the Debugger.
+
+
+
+''', (('Comment', 'LOCALIZATION NOTE'),))
+
+ def test_positions(self):
+ self.parser.readContents('''\
+one = value
+two = other \\
+escaped value
+''')
+ one, two = list(self.parser)
+ self.assertEqual(one.position(), (1, 1))
+ self.assertEqual(one.value_position(), (1, 7))
+ self.assertEqual(two.position(), (2, 1))
+ self.assertEqual(two.value_position(), (2, 7))
+ self.assertEqual(two.value_position(-1), (3, 14))
+ self.assertEqual(two.value_position(10), (3, 3))
if __name__ == '__main__':
unittest.main()