--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -230,33 +230,31 @@ class Parser(object):
l = []
m = {}
for e in self:
m[e.key] = len(l)
l.append(e)
return (l, m)
def __iter__(self):
- return self.walk(onlyEntities=True)
+ return self.walk(only_localizable=True)
- def walk(self, onlyEntities=False, withWhitespace=False):
+ def walk(self, only_localizable=False):
if not self.ctx:
# loading file failed, or we just didn't load anything
return
ctx = self.ctx
contents = ctx.contents
next_entity_offset = 0
entity = self.getNext(ctx, next_entity_offset)
while entity:
- if (isinstance(entity, (Entity, Junk))):
+ if isinstance(entity, (Entity, Junk)):
yield entity
- elif (not onlyEntities and not isinstance(entity, Whitespace)):
- yield entity
- elif (withWhitespace):
+ elif not only_localizable:
yield entity
next_entity_offset = entity.span[1]
entity = self.getNext(ctx, next_entity_offset)
if len(contents) > next_entity_offset:
yield Junk(ctx, (next_entity_offset, len(contents)))
@@ -651,24 +649,24 @@ class FluentEntity(Entity):
class FluentParser(Parser):
capabilities = CAN_SKIP
def __init__(self):
super(FluentParser, self).__init__()
self.ftl_parser = FTLParser()
- def walk(self, onlyEntities=False):
+ def walk(self, only_localizable=False):
if not self.ctx:
# loading file failed, or we just didn't load anything
return
resource = self.ftl_parser.parse(self.ctx.contents)
last_span_end = 0
for entry in resource.body:
- if not onlyEntities:
+ if not only_localizable:
if entry.span.start > last_span_end:
yield Whitespace(
self.ctx, (last_span_end, entry.span.start))
if isinstance(entry, ftl.Message):
yield FluentEntity(self.ctx, entry)
elif isinstance(entry, ftl.Junk):
start = entry.span.start
@@ -677,17 +675,17 @@ class FluentParser(Parser):
start += re.match('\s*', entry.content).end()
# strip trailing whitespace
ws, we = re.search('\s*$', entry.content).span()
end -= we - ws
yield Junk(self.ctx, (start, end))
last_span_end = entry.span.end
- if not onlyEntities:
+ if not only_localizable:
eof_offset = len(self.ctx.contents)
if eof_offset > last_span_end:
yield Whitespace(self.ctx, (last_span_end, eof_offset))
__constructors = [('\\.dtd$', DTDParser()),
('\\.properties$', PropertiesParser()),
('\\.ini$', IniParser()),
--- a/compare_locales/tests/test_defines.py
+++ b/compare_locales/tests/test_defines.py
@@ -27,113 +27,139 @@ class TestDefinesParser(ParserTestMixin,
# If non-English locales wish to credit multiple contributors, uncomment this
# variable definition and use the format specified.
# #define MOZ_LANGPACK_CONTRIBUTORS <em:contributor>Joe Solon</em:contributor>
#unfilter emptyLines
''', (
('Comment', mpl2),
+ ('Whitespace', '\n'),
('DefinesInstruction', 'filter emptyLines'),
+ ('Whitespace', '\n\n'),
('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+ ('Whitespace', '\n\n'),
('Comment', '#define'),
+ ('Whitespace', '\n\n'),
('DefinesInstruction', 'unfilter emptyLines'),
- ('Junk', '\n')))
+ ('Junk', '\n\n')))
def testBrowserWithContributors(self):
self._test(mpl2 + '''
#filter emptyLines
#define MOZ_LANGPACK_CREATOR mozilla.org
# If non-English locales wish to credit multiple contributors, uncomment this
# variable definition and use the format specified.
#define MOZ_LANGPACK_CONTRIBUTORS <em:contributor>Joe Solon</em:contributor>
#unfilter emptyLines
''', (
('Comment', mpl2),
+ ('Whitespace', '\n'),
('DefinesInstruction', 'filter emptyLines'),
+ ('Whitespace', '\n\n'),
('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+ ('Whitespace', '\n\n'),
('Comment', 'non-English'),
+ ('Whitespace', '\n'),
('MOZ_LANGPACK_CONTRIBUTORS',
'<em:contributor>Joe Solon</em:contributor>'),
+ ('Whitespace', '\n\n'),
('DefinesInstruction', 'unfilter emptyLines'),
- ('Junk', '\n')))
+ ('Junk', '\n\n')))
def testCommentWithNonAsciiCharacters(self):
self._test(mpl2 + '''
#filter emptyLines
# e.g. #define seamonkey_l10n <DT><A HREF="urn:foo">SeaMonkey v češtině</a>
#define seamonkey_l10n_long
#unfilter emptyLines
''', (
('Comment', mpl2),
+ ('Whitespace', '\n'),
('DefinesInstruction', 'filter emptyLines'),
+ ('Whitespace', '\n\n'),
('Comment', u'češtině'),
+ ('Whitespace', '\n'),
('seamonkey_l10n_long', ''),
+ ('Whitespace', '\n\n'),
('DefinesInstruction', 'unfilter emptyLines'),
- ('Junk', '\n')))
+ ('Junk', '\n\n')))
def test_no_empty_lines(self):
self._test('''#define MOZ_LANGPACK_CREATOR mozilla.org
#define MOZ_LANGPACK_CREATOR mozilla.org
''', (
('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
- ('MOZ_LANGPACK_CREATOR', 'mozilla.org')))
+ ('Whitespace', '\n'),
+ ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+ ('Whitespace', '\n')))
def test_empty_line_between(self):
self._test('''#define MOZ_LANGPACK_CREATOR mozilla.org
#define MOZ_LANGPACK_CREATOR mozilla.org
''', (
('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
('Junk', '\n'),
- ('MOZ_LANGPACK_CREATOR', 'mozilla.org')))
+ ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+ ('Whitespace', '\n')))
def test_empty_line_at_the_beginning(self):
self._test('''
#define MOZ_LANGPACK_CREATOR mozilla.org
#define MOZ_LANGPACK_CREATOR mozilla.org
''', (
('Junk', '\n'),
('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
- ('MOZ_LANGPACK_CREATOR', 'mozilla.org')))
+ ('Whitespace', '\n'),
+ ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+ ('Whitespace', '\n')))
def test_filter_empty_lines(self):
self._test('''#filter emptyLines
#define MOZ_LANGPACK_CREATOR mozilla.org
#define MOZ_LANGPACK_CREATOR mozilla.org
#unfilter emptyLines''', (
('DefinesInstruction', 'filter emptyLines'),
+ ('Whitespace', '\n\n'),
('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+ ('Whitespace', '\n'),
('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+ ('Whitespace', '\n'),
('DefinesInstruction', 'unfilter emptyLines')))
def test_unfilter_empty_lines_with_trailing(self):
self._test('''#filter emptyLines
#define MOZ_LANGPACK_CREATOR mozilla.org
#define MOZ_LANGPACK_CREATOR mozilla.org
#unfilter emptyLines
''', (
('DefinesInstruction', 'filter emptyLines'),
+ ('Whitespace', '\n\n'),
('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
+ ('Whitespace', '\n'),
('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
- ('DefinesInstruction', 'unfilter emptyLines')))
+ ('Whitespace', '\n'),
+ ('DefinesInstruction', 'unfilter emptyLines'),
+ ('Whitespace', '\n')))
def testToolkit(self):
self._test('''#define MOZ_LANG_TITLE English (US)
''', (
- ('MOZ_LANG_TITLE', 'English (US)'),))
+ ('MOZ_LANG_TITLE', 'English (US)'),
+ ('Whitespace', '\n')))
def testToolkitEmpty(self):
self._test('', tuple())
def test_empty_file(self):
'''Test that empty files generate errors
defines.inc are interesting that way, as their
--- a/compare_locales/tests/test_dtd.py
+++ b/compare_locales/tests/test_dtd.py
@@ -25,94 +25,104 @@ class TestDTD(ParserTestMixin, unittest.
<!ENTITY good.two "two">
<!ENTITY bad.two "bad "quoted" word">
<!ENTITY good.three "three">
<!ENTITY good.four "good ' quote">
<!ENTITY good.five "good 'quoted' word">
'''
quoteRef = (
('good.one', 'one'),
- ('Junk', '<!ENTITY bad.one "bad " quote">'),
+ ('Whitespace', '\n'),
+ ('Junk', '<!ENTITY bad.one "bad " quote">\n'),
('good.two', 'two'),
- ('Junk', '<!ENTITY bad.two "bad "quoted" word">'),
+ ('Whitespace', '\n'),
+ ('Junk', '<!ENTITY bad.two "bad "quoted" word">\n'),
('good.three', 'three'),
+ ('Whitespace', '\n'),
('good.four', 'good \' quote'),
- ('good.five', 'good \'quoted\' word'),)
+ ('Whitespace', '\n'),
+ ('good.five', 'good \'quoted\' word'),
+ ('Whitespace', '\n'),)
def test_quotes(self):
self._test(self.quoteContent, self.quoteRef)
def test_apos(self):
qr = re.compile('[\'"]', re.M)
def quot2apos(s):
return qr.sub(lambda m: m.group(0) == '"' and "'" or '"', s)
self._test(quot2apos(self.quoteContent),
- map(lambda t: (t[0], quot2apos(t[1])), self.quoteRef))
+ ((ref[0], quot2apos(ref[1])) for ref in self.quoteRef))
def test_parsed_ref(self):
self._test('''<!ENTITY % fooDTD SYSTEM "chrome://brand.dtd">
%fooDTD;
''',
(('fooDTD', '"chrome://brand.dtd"'),))
def test_trailing_comment(self):
self._test('''<!ENTITY first "string">
<!ENTITY second "string">
<!--
<!ENTITY commented "out">
-->
''',
- (('first', 'string'), ('second', 'string'),
- ('Comment', 'out')))
+ (
+ ('first', 'string'),
+ ('Whitespace', '\n'),
+ ('second', 'string'),
+ ('Whitespace', '\n'),
+ ('Comment', 'out'),
+ ('Whitespace', '\n')))
def test_license_header(self):
p = parser.getParser('foo.dtd')
p.readContents(self.resource('triple-license.dtd'))
entities = list(p.walk())
self.assert_(isinstance(entities[0], parser.Comment))
self.assertIn('MPL', entities[0].all)
- e = entities[1]
+ e = entities[2]
self.assert_(isinstance(e, parser.Entity))
self.assertEqual(e.key, 'foo')
self.assertEqual(e.val, 'value')
- self.assertEqual(len(entities), 2)
+ self.assertEqual(len(entities), 4)
p.readContents('''\
<!-- This Source Code Form is subject to the terms of the Mozilla Public
- License, v. 2.0. If a copy of the MPL was not distributed with this file,
- You can obtain one at http://mozilla.org/MPL/2.0/. -->
<!ENTITY foo "value">
''')
entities = list(p.walk())
self.assert_(isinstance(entities[0], parser.Comment))
self.assertIn('MPL', entities[0].all)
- e = entities[1]
+ e = entities[2]
self.assert_(isinstance(e, parser.Entity))
self.assertEqual(e.key, 'foo')
self.assertEqual(e.val, 'value')
- self.assertEqual(len(entities), 2)
+ self.assertEqual(len(entities), 4)
def testBOM(self):
self._test(u'\ufeff<!ENTITY foo.label "stuff">'.encode('utf-8'),
(('foo.label', 'stuff'),))
def test_trailing_whitespace(self):
self._test('<!ENTITY foo.label "stuff">\n \n',
- (('foo.label', 'stuff'),))
+ (('foo.label', 'stuff'), ('Whitespace', '\n \n')))
def test_unicode_comment(self):
self._test('<!-- \xe5\x8f\x96 -->',
(('Comment', u'\u53d6'),))
def test_empty_file(self):
self._test('', tuple())
- self._test('\n', tuple())
- self._test('\n\n', tuple())
- self._test(' \n\n', tuple())
+ self._test('\n', (('Whitespace', '\n'),))
+ self._test('\n\n', (('Whitespace', '\n\n'),))
+ self._test(' \n\n', (('Whitespace', ' \n\n'),))
def test_positions(self):
self.parser.readContents('''\
<!ENTITY one "value">
<!ENTITY two "other
escaped value">
''')
one, two = list(self.parser)
--- a/compare_locales/tests/test_ini.py
+++ b/compare_locales/tests/test_ini.py
@@ -19,127 +19,161 @@ class TestIniParser(ParserTestMixin, uni
filename = 'foo.ini'
def testSimpleHeader(self):
self._test('''; This file is in the UTF-8 encoding
[Strings]
TitleText=Some Title
''', (
('Comment', 'UTF-8 encoding'),
+ ('Whitespace', '\n'),
('IniSection', 'Strings'),
- ('TitleText', 'Some Title'),))
+ ('Whitespace', '\n'),
+ ('TitleText', 'Some Title'),
+ ('Whitespace', '\n')))
def testMPL2_Space_UTF(self):
self._test(mpl2 + '''
; This file is in the UTF-8 encoding
[Strings]
TitleText=Some Title
''', (
('Comment', mpl2),
+ ('Whitespace', '\n\n'),
('Comment', 'UTF-8'),
+ ('Whitespace', '\n'),
('IniSection', 'Strings'),
- ('TitleText', 'Some Title'),))
+ ('Whitespace', '\n'),
+ ('TitleText', 'Some Title'),
+ ('Whitespace', '\n')))
def testMPL2_Space(self):
self._test(mpl2 + '''
[Strings]
TitleText=Some Title
''', (
('Comment', mpl2),
+ ('Whitespace', '\n\n'),
('IniSection', 'Strings'),
- ('TitleText', 'Some Title'),))
+ ('Whitespace', '\n'),
+ ('TitleText', 'Some Title'),
+ ('Whitespace', '\n')))
def testMPL2_MultiSpace(self):
self._test(mpl2 + '''
; more comments
[Strings]
TitleText=Some Title
''', (
('Comment', mpl2),
+ ('Whitespace', '\n\n'),
('Comment', 'more comments'),
+ ('Whitespace', '\n\n'),
('IniSection', 'Strings'),
- ('TitleText', 'Some Title'),))
+ ('Whitespace', '\n'),
+ ('TitleText', 'Some Title'),
+ ('Whitespace', '\n')))
def testMPL2_JunkBeforeCategory(self):
self._test(mpl2 + '''
Junk
[Strings]
TitleText=Some Title
''', (
('Comment', mpl2),
- ('Junk', 'Junk'),
+ ('Whitespace', '\n'),
+ ('Junk', 'Junk\n'),
('IniSection', 'Strings'),
- ('TitleText', 'Some Title')))
+ ('Whitespace', '\n'),
+ ('TitleText', 'Some Title'),
+ ('Whitespace', '\n')))
def test_TrailingComment(self):
self._test(mpl2 + '''
[Strings]
TitleText=Some Title
;Stray trailing comment
''', (
('Comment', mpl2),
+ ('Whitespace', '\n\n'),
('IniSection', 'Strings'),
+ ('Whitespace', '\n'),
('TitleText', 'Some Title'),
- ('Comment', 'Stray trailing')))
+ ('Whitespace', '\n'),
+ ('Comment', 'Stray trailing'),
+ ('Whitespace', '\n')))
def test_SpacedTrailingComments(self):
self._test(mpl2 + '''
[Strings]
TitleText=Some Title
;Stray trailing comment
;Second stray comment
''', (
('Comment', mpl2),
+ ('Whitespace', '\n\n'),
('IniSection', 'Strings'),
+ ('Whitespace', '\n'),
('TitleText', 'Some Title'),
- ('Comment', 'Second stray comment')))
+ ('Whitespace', '\n\n'),
+ ('Comment', 'Second stray comment'),
+ ('Whitespace', '\n\n')))
def test_TrailingCommentsAndJunk(self):
self._test(mpl2 + '''
[Strings]
TitleText=Some Title
;Stray trailing comment
Junk
;Second stray comment
''', (
('Comment', mpl2),
+ ('Whitespace', '\n\n'),
('IniSection', 'Strings'),
+ ('Whitespace', '\n'),
('TitleText', 'Some Title'),
+ ('Whitespace', '\n\n'),
('Comment', 'Stray trailing'),
- ('Junk', 'Junk'),
- ('Comment', 'Second stray comment')))
+ ('Whitespace', '\n'),
+ ('Junk', 'Junk\n'),
+ ('Comment', 'Second stray comment'),
+ ('Whitespace', '\n\n')))
def test_JunkInbetweenEntries(self):
self._test(mpl2 + '''
[Strings]
TitleText=Some Title
Junk
Good=other string
''', (
('Comment', mpl2),
+ ('Whitespace', '\n\n'),
('IniSection', 'Strings'),
+ ('Whitespace', '\n'),
('TitleText', 'Some Title'),
- ('Junk', 'Junk'),
- ('Good', 'other string')))
+ ('Whitespace', '\n\n'),
+ ('Junk', 'Junk\n\n'),
+ ('Good', 'other string'),
+ ('Whitespace', '\n')))
def test_empty_file(self):
self._test('', tuple())
- self._test('\n', tuple())
- self._test('\n\n', tuple())
- self._test(' \n\n', tuple())
+ self._test('\n', (('Whitespace', '\n'),))
+ self._test('\n\n', (('Whitespace', '\n\n'),))
+ self._test(' \n\n', (('Whitespace', ' \n\n'),))
if __name__ == '__main__':
unittest.main()
--- a/compare_locales/tests/test_properties.py
+++ b/compare_locales/tests/test_properties.py
@@ -17,21 +17,25 @@ class TestPropertiesParser(ParserTestMix
two_line = This is the first \
of two lines
one_line_trailing = This line ends in \\
and has junk
two_lines_triple = This line is one of two and ends in \\\
and still has another line coming
''', (
('one_line', 'This is one line'),
+ ('Whitespace', '\n'),
('two_line', u'This is the first of two lines'),
+ ('Whitespace', '\n'),
('one_line_trailing', u'This line ends in \\'),
+ ('Whitespace', '\n'),
('Junk', 'and has junk\n'),
('two_lines_triple', 'This line is one of two and ends in \\'
- 'and still has another line coming')))
+ 'and still has another line coming'),
+ ('Whitespace', '\n')))
def testProperties(self):
# port of netwerk/test/PropertiesTest.cpp
self.parser.readContents(self.resource('test.properties'))
ref = ['1', '2', '3', '4', '5', '6', '7', '8',
'this is the first part of a continued line '
'and here is the 2nd part']
i = iter(self.parser)
@@ -58,17 +62,21 @@ and an end''', (('bar', 'one line with a
def test_license_header(self):
self._test('''\
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
foo=value
-''', (('Comment', 'MPL'), ('foo', 'value')))
+''', (
+ ('Comment', 'MPL'),
+ ('Whitespace', '\n\n'),
+ ('foo', 'value'),
+ ('Whitespace', '\n')))
def test_escapes(self):
self.parser.readContents(r'''
# unicode escapes
zero = some \unicode
one = \u0
two = \u41
three = \u042
@@ -82,60 +90,73 @@ seven = \n\r\t\\
self.assertEqual(e.val, r)
def test_trailing_comment(self):
self._test('''first = string
second = string
#
#commented out
-''', (('first', 'string'), ('second', 'string'),
- ('Comment', 'commented out')))
+''', (
+ ('first', 'string'),
+ ('Whitespace', '\n'),
+ ('second', 'string'),
+ ('Whitespace', '\n\n'),
+ ('Comment', 'commented out'),
+ ('Whitespace', '\n')))
def test_trailing_newlines(self):
self._test('''\
foo = bar
\x20\x20
- ''', (('foo', 'bar'),))
+ ''', (('foo', 'bar'), ('Whitespace', '\n\n\x20\x20\n ')))
def test_just_comments(self):
self._test('''\
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# LOCALIZATION NOTE These strings are used inside the Promise debugger
# which is available as a panel in the Debugger.
-''', (('Comment', 'MPL'), ('Comment', 'LOCALIZATION NOTE')))
+''', (
+ ('Comment', 'MPL'),
+ ('Whitespace', '\n\n'),
+ ('Comment', 'LOCALIZATION NOTE'),
+ ('Whitespace', '\n')))
def test_just_comments_without_trailing_newline(self):
self._test('''\
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# LOCALIZATION NOTE These strings are used inside the Promise debugger
# which is available as a panel in the Debugger.''', (
- ('Comment', 'MPL'), ('Comment', 'LOCALIZATION NOTE')))
+ ('Comment', 'MPL'),
+ ('Whitespace', '\n\n'),
+ ('Comment', 'LOCALIZATION NOTE')))
def test_trailing_comment_and_newlines(self):
self._test('''\
# LOCALIZATION NOTE These strings are used inside the Promise debugger
# which is available as a panel in the Debugger.
-''', (('Comment', 'LOCALIZATION NOTE'),))
+''', (
+ ('Comment', 'LOCALIZATION NOTE'),
+ ('Whitespace', '\n\n\n')))
def test_empty_file(self):
self._test('', tuple())
- self._test('\n', tuple())
- self._test('\n\n', tuple())
- self._test(' \n\n', tuple())
+ self._test('\n', (('Whitespace', '\n'),))
+ self._test('\n\n', (('Whitespace', '\n\n'),))
+ self._test(' \n\n', (('Whitespace', '\n\n'),))
def test_positions(self):
self.parser.readContents('''\
one = value
two = other \\
escaped value
''')
one, two = list(self.parser)