--- a/compare_locales/checks.py
+++ b/compare_locales/checks.py
@@ -1,24 +1,21 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import
+from __future__ import unicode_literals
import re
from collections import Counter
from difflib import SequenceMatcher
from xml import sax
import six
from six.moves import range
from six.moves import zip
-try:
- from cStringIO import StringIO
-except ImportError:
- from StringIO import StringIO
from fluent.syntax import ast as ftl
from compare_locales.parser import DTDParser, PropertiesEntity, FluentMessage
from compare_locales import plurals
class Checker(object):
@@ -220,17 +217,17 @@ class DTDChecker(Checker):
<!ENTITY key ""> definition to the header.
Also checks for some CSS and number heuristics in the values.
"""
pattern = re.compile('.*\.dtd$')
needs_reference = True # to cast a wider net for known entity references
eref = re.compile('&(%s);' % DTDParser.Name)
- tmpl = '''<!DOCTYPE elem [%s]>
+ tmpl = b'''<!DOCTYPE elem [%s]>
<elem>%s</elem>
'''
xmllist = set(('amp', 'lt', 'gt', 'apos', 'quot'))
def __init__(self, extra_tests, locale=None):
super(DTDChecker, self).__init__(extra_tests, locale=locale)
self.processContent = False
if self.extra_tests is not None and 'android-dtd' in self.extra_tests:
@@ -242,17 +239,17 @@ class DTDChecker(Checker):
self.__known_entities = set()
for ent in self.reference:
self.__known_entities.update(
self.entities_for_value(ent.raw_val))
return self.__known_entities if self.__known_entities is not None \
else self.entities_for_value(refValue)
def entities_for_value(self, value):
- reflist = set(m.group(1).encode('utf-8')
+ reflist = set(m.group(1)
for m in self.eref.finditer(value))
reflist -= self.xmllist
return reflist
# Setup for XML parser, with default and text-only content handler
class TextContent(sax.handler.ContentHandler):
textcontent = ''
@@ -283,44 +280,48 @@ class DTDChecker(Checker):
reflist = self.known_entities(refValue)
inContext = self.entities_for_value(refValue)
entities = ''.join('<!ENTITY %s "">' % s for s in sorted(reflist))
parser = sax.make_parser()
parser.setFeature(sax.handler.feature_external_ges, False)
parser.setContentHandler(self.defaulthandler)
try:
- parser.parse(StringIO(self.tmpl %
- (entities, refValue.encode('utf-8'))))
+ parser.parse(
+ six.BytesIO(self.tmpl %
+ (entities.encode('utf-8'),
+ refValue.encode('utf-8'))))
# also catch stray %
- parser.parse(StringIO(self.tmpl %
- (refEnt.all.encode('utf-8') + entities,
- '&%s;' % refEnt.key.encode('utf-8'))))
+ parser.parse(
+ six.BytesIO(self.tmpl %
+ ((refEnt.all + entities).encode('utf-8'),
+ b'&%s;' % refEnt.key.encode('utf-8'))))
except sax.SAXParseException as e:
yield ('warning',
(0, 0),
"can't parse en-US value", 'xmlparse')
# find entities the l10nValue references,
# reusing markup from DTDParser.
l10nlist = self.entities_for_value(l10nValue)
missing = sorted(l10nlist - reflist)
_entities = entities + ''.join('<!ENTITY %s "">' % s for s in missing)
if self.processContent:
self.texthandler.textcontent = ''
parser.setContentHandler(self.texthandler)
try:
- parser.parse(StringIO(self.tmpl % (_entities,
+ parser.parse(six.BytesIO(self.tmpl % (_entities.encode('utf-8'),
l10nValue.encode('utf-8'))))
# also catch stray %
# if this fails, we need to substract the entity definition
parser.setContentHandler(self.defaulthandler)
- parser.parse(StringIO(self.tmpl % (
- l10nEnt.all.encode('utf-8') + _entities,
- '&%s;' % l10nEnt.key.encode('utf-8'))))
+ parser.parse(
+ six.BytesIO(self.tmpl %
+ ((l10nEnt.all + _entities).encode('utf-8'),
+ b'&%s;' % l10nEnt.key.encode('utf-8'))))
except sax.SAXParseException as e:
# xml parse error, yield error
# sometimes, the error is reported on our fake closing
# element, make that the end of the last line
lnr = e.getLineNumber() - 1
lines = l10nValue.splitlines()
if lnr > len(lines):
lnr = len(lines)
@@ -342,24 +343,24 @@ class DTDChecker(Checker):
', '.join(sorted(inContext))
if elsewhere:
warntmpl += ', %s known)' % ', '.join(sorted(elsewhere))
else:
warntmpl += ')'
else:
warntmpl += ' (%s known)' % ', '.join(sorted(reflist))
for key in missing:
- yield ('warning', (0, 0), warntmpl % key.decode('utf-8'),
+ yield ('warning', (0, 0), warntmpl % key,
'xmlparse')
if inContext and l10nlist and l10nlist - inContext - set(missing):
mismatch = sorted(l10nlist - inContext - set(missing))
for key in mismatch:
yield ('warning', (0, 0),
'Entity %s referenced, but %s used in context' % (
- key.decode('utf-8'),
+ key,
', '.join(sorted(inContext))
), 'xmlparse')
# Number check
if self.num.match(refValue) and not self.num.match(l10nValue):
yield ('warning', 0, 'reference is a number', 'number')
# CSS checks
# just a length, width="100em"
@@ -444,22 +445,22 @@ class DTDChecker(Checker):
q = "[\"']"
offset = -1
stray_quot = re.compile(r"[\\\\]*(%s)" % q)
for m in stray_quot.finditer(val):
if len(m.group(0)) % 2:
# found an unescaped single or double quote, which message?
if m.group(1) == '"':
- msg = u"Quotes in Android DTDs need escaping with \\\" "\
- u"or \\u0022, or put string in apostrophes."
+ msg = "Quotes in Android DTDs need escaping with \\\" "\
+ "or \\u0022, or put string in apostrophes."
else:
- msg = u"Apostrophes in Android DTDs need escaping with "\
- u"\\' or \\u0027, or use \u2019, or put string in "\
- u"quotes."
+ msg = "Apostrophes in Android DTDs need escaping with "\
+ "\\' or \\u0027, or use \u2019, or put string in "\
+ "quotes."
yield ('error', m.end(0)+offset, msg, 'android')
class FluentChecker(Checker):
'''Tests to run on Fluent (FTL) files.
'''
pattern = re.compile('.*\.ftl')
--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -1,13 +1,14 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import
+from __future__ import unicode_literals
import re
import bisect
import codecs
from collections import Counter
import logging
try:
from html import unescape as html_unescape
@@ -106,18 +107,18 @@ class EntityBase(object):
re_br = re.compile('<br\s*/?>', re.U)
re_sgml = re.compile('</?\w+.*?>', re.U | re.M)
def count_words(self):
"""Count the words in an English string.
Replace a couple of xml markup to make that safer, too.
"""
- value = self.re_br.sub(u'\n', self.val)
- value = self.re_sgml.sub(u'', value)
+ value = self.re_br.sub('\n', self.val)
+ value = self.re_sgml.sub('', value)
return len(value.split())
def equals(self, other):
return self.key == other.key and self.val == other.val
class Entity(EntityBase):
pass
@@ -239,17 +240,17 @@ class Parser(object):
def __init__(self):
if not hasattr(self, 'encoding'):
self.encoding = 'utf-8'
self.ctx = None
self.last_comment = None
def readFile(self, file):
- with open(file, 'rU') as f:
+ with open(file, 'rbU') as f:
try:
self.readContents(f.read())
except UnicodeDecodeError as e:
(logging.getLogger('locales')
.error("Can't read file: " + file + '; ' + str(e)))
def readContents(self, contents):
'''Read contents and create parsing context.
@@ -359,32 +360,32 @@ class DTDEntity(Entity):
class DTDParser(Parser):
# http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar
# ":" | [A-Z] | "_" | [a-z] |
# [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
# | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
# [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
# [#x10000-#xEFFFF]
- CharMinusDash = u'\x09\x0A\x0D\u0020-\u002C\u002E-\uD7FF\uE000-\uFFFD'
+ CharMinusDash = '\x09\x0A\x0D\u0020-\u002C\u002E-\uD7FF\uE000-\uFFFD'
XmlComment = '<!--(?:-?[%s])*?-->' % CharMinusDash
- NameStartChar = u':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \
- u'\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F' + \
- u'\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD'
+ NameStartChar = ':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \
+ '\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F' + \
+ '\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD'
# + \U00010000-\U000EFFFF seems to be unsupported in python
# NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 |
# [#x0300-#x036F] | [#x203F-#x2040]
- NameChar = NameStartChar + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040'
+ NameChar = NameStartChar + r'\-\.0-9' + '\xB7\u0300-\u036F\u203F-\u2040'
Name = '[' + NameStartChar + '][' + NameChar + ']*'
reKey = re.compile('<!ENTITY\s+(?P<key>' + Name + ')\s+'
'(?P<val>\"[^\"]*\"|\'[^\']*\'?)\s*>',
re.DOTALL | re.M)
# add BOM to DTDs, details in bug 435002
- reHeader = re.compile(u'^\ufeff')
+ reHeader = re.compile('^\ufeff')
reComment = re.compile('<!--(?P<val>-?[%s])*?-->' % CharMinusDash,
re.S)
rePE = re.compile(u'<!ENTITY\s+%\s+(?P<key>' + Name + ')\s+'
u'SYSTEM\s+(?P<val>\"[^\"]*\"|\'[^\']*\')\s*>\s*'
u'%' + Name + ';'
u'(?:[ \t]*(?:' + XmlComment + u'\s*)*\n?)?')
class Comment(Comment):
--- a/compare_locales/tests/__init__.py
+++ b/compare_locales/tests/__init__.py
@@ -27,25 +27,25 @@ class ParserTestMixin():
def tearDown(self):
'tear down this test'
del self.parser
def resource(self, name):
testcontent = resource_string(__name__, 'data/' + name)
# fake universal line endings
- testcontent = re.sub('\r\n?', lambda m: '\n', testcontent)
+ testcontent = re.sub(b'\r\n?', lambda m: b'\n', testcontent)
return testcontent
- def _test(self, content, refs):
+ def _test(self, unicode_content, refs):
'''Helper to test the parser.
Compares the result of parsing content with the given list
of reference keys and values.
'''
- self.parser.readContents(content)
+ self.parser.readContents(unicode_content.encode(self.parser.encoding))
entities = list(self.parser.walk())
for entity, ref in zip_longest(entities, refs):
self.assertTrue(entity,
'excess reference entity ' + six.text_type(ref))
self.assertTrue(ref,
'excess parsed entity ' + six.text_type(entity))
if isinstance(entity, parser.Entity):
self.assertEqual(entity.key, ref[0])
--- a/compare_locales/tests/test_checks.py
+++ b/compare_locales/tests/test_checks.py
@@ -1,19 +1,21 @@
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import
+from __future__ import unicode_literals
import unittest
from compare_locales.checks import getChecker
from compare_locales.parser import getParser, Parser, DTDEntity
from compare_locales.paths import File
+import six
from six.moves import range
class BaseHelper(unittest.TestCase):
file = None
refContent = None
def setUp(self):
@@ -32,244 +34,247 @@ class BaseHelper(unittest.TestCase):
checker.set_reference(self.refList)
ref = self.refList[self.refMap[l10n.key]]
found = tuple(checker.check(ref, l10n))
self.assertEqual(found, refWarnOrErrors)
class TestProperties(BaseHelper):
file = File('foo.properties', 'foo.properties')
- refContent = '''some = value
+ refContent = b'''some = value
'''
def testGood(self):
- self._test('''some = localized''',
+ self._test(b'''some = localized''',
tuple())
def testMissedEscape(self):
- self._test(r'''some = \u67ood escape, bad \escape''',
+ self._test(br'''some = \u67ood escape, bad \escape''',
(('warning', 20, r'unknown escape sequence, \e',
'escape'),))
class TestPlurals(BaseHelper):
file = File('foo.properties', 'foo.properties')
- refContent = '''\
+ refContent = b'''\
# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
# #1 number of files
# example: 111 files - Downloads
downloadsTitleFiles=#1 file - Downloads;#1 files - #2
'''
def testGood(self):
- self._test('''\
+ self._test(b'''\
# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
# #1 number of files
# example: 111 files - Downloads
downloadsTitleFiles=#1 file - Downloads;#1 files - #2;#1 filers
''',
tuple())
def testNotUsed(self):
- self._test('''\
+ self._test(b'''\
# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
# #1 number of files
# example: 111 files - Downloads
downloadsTitleFiles=#1 file - Downloads;#1 files - Downloads;#1 filers
''',
(('warning', 0, 'not all variables used in l10n',
'plural'),))
def testNotDefined(self):
- self._test('''\
+ self._test(b'''\
# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
# #1 number of files
# example: 111 files - Downloads
downloadsTitleFiles=#1 file - Downloads;#1 files - #2;#1 #3
''',
(('error', 0, 'unreplaced variables in l10n', 'plural'),))
class TestPluralForms(BaseHelper):
file = File('foo.properties', 'foo.properties', locale='en-GB')
- refContent = '''\
+ refContent = b'''\
# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
# #1 number of files
# example: 111 files - Downloads
downloadsTitleFiles=#1 file;#1 files
'''
def test_matching_forms(self):
- self._test('''\
+ self._test(b'''\
downloadsTitleFiles=#1 fiiilee;#1 fiiilees
''',
tuple())
def test_lacking_forms(self):
- self._test('''\
+ self._test(b'''\
downloadsTitleFiles=#1 fiiilee
''',
(('warning', 0, 'expecting 2 plurals, found 1', 'plural'),))
def test_excess_forms(self):
- self._test('''\
+ self._test(b'''\
downloadsTitleFiles=#1 fiiilee;#1 fiiilees;#1 fiiilees
''',
(('warning', 0, 'expecting 2 plurals, found 3', 'plural'),))
class TestDTDs(BaseHelper):
file = File('foo.dtd', 'foo.dtd')
- refContent = '''<!ENTITY foo "This is 'good'">
+ refContent = b'''<!ENTITY foo "This is 'good'">
<!ENTITY width "10ch">
<!ENTITY style "width: 20ch; height: 280px;">
<!ENTITY minStyle "min-height: 50em;">
<!ENTITY ftd "0">
<!ENTITY formatPercent "This is 100% correct">
<!ENTITY some.key "K">
'''
def testWarning(self):
- self._test('''<!ENTITY foo "This is ¬ good">
+ self._test(b'''<!ENTITY foo "This is ¬ good">
''',
(('warning', (0, 0), 'Referencing unknown entity `not`',
'xmlparse'),))
# make sure we only handle translated entity references
- self._test(u'''<!ENTITY foo "This is &ƞǿŧ; good">
+ self._test('''<!ENTITY foo "This is &ƞǿŧ; good">
'''.encode('utf-8'),
- (('warning', (0, 0), u'Referencing unknown entity `ƞǿŧ`',
+ (('warning', (0, 0), 'Referencing unknown entity `ƞǿŧ`',
'xmlparse'),))
def testErrorFirstLine(self):
- self._test('''<!ENTITY foo "This is </bad> stuff">
+ self._test(b'''<!ENTITY foo "This is </bad> stuff">
''',
(('error', (1, 10), 'mismatched tag', 'xmlparse'),))
def testErrorSecondLine(self):
- self._test('''<!ENTITY foo "This is
+ self._test(b'''<!ENTITY foo "This is
</bad>
stuff">
''',
(('error', (2, 4), 'mismatched tag', 'xmlparse'),))
def testKeyErrorSingleAmpersand(self):
- self._test('''<!ENTITY some.key "&">
+ self._test(b'''<!ENTITY some.key "&">
''',
(('error', (1, 1), 'not well-formed (invalid token)',
'xmlparse'),))
def testXMLEntity(self):
- self._test('''<!ENTITY foo "This is "good"">
+ self._test(b'''<!ENTITY foo "This is "good"">
''',
tuple())
def testPercentEntity(self):
- self._test('''<!ENTITY formatPercent "Another 100%">
+ self._test(b'''<!ENTITY formatPercent "Another 100%">
''',
tuple())
- self._test('''<!ENTITY formatPercent "Bad 100% should fail">
+ self._test(b'''<!ENTITY formatPercent "Bad 100% should fail">
''',
(('error', (0, 32), 'not well-formed (invalid token)',
'xmlparse'),))
def testNoNumber(self):
- self._test('''<!ENTITY ftd "foo">''',
+ self._test(b'''<!ENTITY ftd "foo">''',
(('warning', 0, 'reference is a number', 'number'),))
def testNoLength(self):
- self._test('''<!ENTITY width "15miles">''',
+ self._test(b'''<!ENTITY width "15miles">''',
(('error', 0, 'reference is a CSS length', 'css'),))
def testNoStyle(self):
- self._test('''<!ENTITY style "15ch">''',
+ self._test(b'''<!ENTITY style "15ch">''',
(('error', 0, 'reference is a CSS spec', 'css'),))
- self._test('''<!ENTITY style "junk">''',
+ self._test(b'''<!ENTITY style "junk">''',
(('error', 0, 'reference is a CSS spec', 'css'),))
def testStyleWarnings(self):
- self._test('''<!ENTITY style "width:15ch">''',
+ self._test(b'''<!ENTITY style "width:15ch">''',
(('warning', 0, 'height only in reference', 'css'),))
- self._test('''<!ENTITY style "width:15em;height:200px;">''',
+ self._test(b'''<!ENTITY style "width:15em;height:200px;">''',
(('warning', 0, "units for width don't match (em != ch)",
'css'),))
def testNoWarning(self):
- self._test('''<!ENTITY width "12em">''', tuple())
- self._test('''<!ENTITY style "width:12ch;height:200px;">''', tuple())
- self._test('''<!ENTITY ftd "0">''', tuple())
+ self._test(b'''<!ENTITY width "12em">''', tuple())
+ self._test(b'''<!ENTITY style "width:12ch;height:200px;">''', tuple())
+ self._test(b'''<!ENTITY ftd "0">''', tuple())
class TestEntitiesInDTDs(BaseHelper):
file = File('foo.dtd', 'foo.dtd')
- refContent = '''<!ENTITY short "This is &brandShortName;">
+ refContent = b'''<!ENTITY short "This is &brandShortName;">
<!ENTITY shorter "This is &brandShorterName;">
<!ENTITY ent.start "Using &brandShorterName; start to">
<!ENTITY ent.end " end">
'''
def testOK(self):
- self._test('''<!ENTITY ent.start "Mit &brandShorterName;">''', tuple())
+ self._test(b'''<!ENTITY ent.start "Mit &brandShorterName;">''',
+ tuple())
def testMismatch(self):
- self._test('''<!ENTITY ent.start "Mit &brandShortName;">''',
+ self._test(b'''<!ENTITY ent.start "Mit &brandShortName;">''',
(('warning', (0, 0),
'Entity brandShortName referenced, '
'but brandShorterName used in context',
'xmlparse'),))
def testAcross(self):
- self._test('''<!ENTITY ent.end "Mit &brandShorterName;">''',
+ self._test(b'''<!ENTITY ent.end "Mit &brandShorterName;">''',
tuple())
def testAcrossWithMismatch(self):
'''If we could tell that ent.start and ent.end are one string,
we should warn. Sadly, we can't, so this goes without warning.'''
- self._test('''<!ENTITY ent.end "Mit &brandShortName;">''',
+ self._test(b'''<!ENTITY ent.end "Mit &brandShortName;">''',
tuple())
def testUnknownWithRef(self):
- self._test('''<!ENTITY ent.start "Mit &foopy;">''',
+ self._test(b'''<!ENTITY ent.start "Mit &foopy;">''',
(('warning',
(0, 0),
'Referencing unknown entity `foopy` '
'(brandShorterName used in context, '
'brandShortName known)',
'xmlparse'),))
def testUnknown(self):
- self._test('''<!ENTITY ent.end "Mit &foopy;">''',
+ self._test(b'''<!ENTITY ent.end "Mit &foopy;">''',
(('warning',
(0, 0),
'Referencing unknown entity `foopy`'
' (brandShortName, brandShorterName known)',
'xmlparse'),))
class TestAndroid(unittest.TestCase):
"""Test Android checker
Make sure we're hitting our extra rules only if
we're passing in a DTD file in the embedding/android module.
"""
- apos_msg = u"Apostrophes in Android DTDs need escaping with \\' or " + \
- u"\\u0027, or use \u2019, or put string in quotes."
- quot_msg = u"Quotes in Android DTDs need escaping with \\\" or " + \
- u"\\u0022, or put string in apostrophes."
+ apos_msg = "Apostrophes in Android DTDs need escaping with \\' or " + \
+ "\\u0027, or use \u2019, or put string in quotes."
+ quot_msg = "Quotes in Android DTDs need escaping with \\\" or " + \
+ "\\u0022, or put string in apostrophes."
def getNext(self, v):
ctx = Parser.Context(v)
return DTDEntity(
ctx, '', (0, len(v)), (), (0, len(v)))
def getDTDEntity(self, v):
+ if isinstance(v, six.binary_type):
+ v = v.decode('utf-8')
v = v.replace('"', '"')
ctx = Parser.Context('<!ENTITY foo "%s">' % v)
return DTDEntity(
ctx, '', (0, len(v) + 16), (9, 12), (14, len(v) + 14))
def test_android_dtd(self):
"""Testing the actual android checks. The logic is involved,
so this is a lot of nitty gritty detail tests.
@@ -334,22 +339,22 @@ class TestAndroid(unittest.TestCase):
l10n = self.getDTDEntity("''\"'")
self.assertEqual(tuple(checker.check(ref, l10n)),
(('error', 1, self.apos_msg, 'android'),))
l10n = self.getDTDEntity('"\'""')
self.assertEqual(tuple(checker.check(ref, l10n)),
(('error', 2, self.quot_msg, 'android'),))
# broken unicode escape
- l10n = self.getDTDEntity("Some broken \u098 unicode")
+ l10n = self.getDTDEntity(b"Some broken \u098 unicode")
self.assertEqual(tuple(checker.check(ref, l10n)),
(('error', 12, 'truncated \\uXXXX escape',
'android'),))
# broken unicode escape, try to set the error off
- l10n = self.getDTDEntity(u"\u9690"*14+"\u006"+" "+"\u0064")
+ l10n = self.getDTDEntity("\u9690"*14+"\\u006"+" "+"\\u0064")
self.assertEqual(tuple(checker.check(ref, l10n)),
(('error', 14, 'truncated \\uXXXX escape',
'android'),))
def test_android_prop(self):
f = File("embedding/android/strings.properties", "strings.properties",
"embedding/android")
checker = getChecker(f, extra_tests=['android-dtd'])
@@ -393,17 +398,17 @@ class TestAndroid(unittest.TestCase):
ref = self.getDTDEntity("plain string")
l10n = self.getDTDEntity("plain localized string with apos: '")
self.assertEqual(tuple(checker.check(ref, l10n)),
())
def test_entities_across_dtd(self):
f = File("browser/strings.dtd", "strings.dtd", "browser")
p = getParser(f.file)
- p.readContents('<!ENTITY other "some &good.ref;">')
+ p.readContents(b'<!ENTITY other "some &good.ref;">')
ref = p.parse()
checker = getChecker(f)
checker.set_reference(ref[0])
# good string
ref = self.getDTDEntity("plain string")
l10n = self.getDTDEntity("plain localized string")
self.assertEqual(tuple(checker.check(ref, l10n)),
())
--- a/compare_locales/tests/test_defines.py
+++ b/compare_locales/tests/test_defines.py
@@ -1,14 +1,15 @@
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import
+from __future__ import unicode_literals
import unittest
from compare_locales.tests import ParserTestMixin
from compare_locales.parser import (
Comment,
DefinesInstruction,
Junk,
Whitespace,
--- a/compare_locales/tests/test_dtd.py
+++ b/compare_locales/tests/test_dtd.py
@@ -1,16 +1,17 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
'''Tests for the DTD parser.
'''
from __future__ import absolute_import
+from __future__ import unicode_literals
import unittest
import re
from compare_locales import parser
from compare_locales.parser import (
Comment,
Junk,
Whitespace,
@@ -87,79 +88,79 @@ class TestDTD(ParserTestMixin, unittest.
entities = list(p.walk())
self.assertIsInstance(entities[0], parser.Comment)
self.assertIn('MPL', entities[0].all)
e = entities[2]
self.assert_(isinstance(e, parser.Entity))
self.assertEqual(e.key, 'foo')
self.assertEqual(e.val, 'value')
self.assertEqual(len(entities), 4)
- p.readContents('''\
+ p.readContents(b'''\
<!-- This Source Code Form is subject to the terms of the Mozilla Public
- License, v. 2.0. If a copy of the MPL was not distributed with this file,
- You can obtain one at http://mozilla.org/MPL/2.0/. -->
<!ENTITY foo "value">
''')
entities = list(p.walk())
self.assert_(isinstance(entities[0], parser.Comment))
self.assertIn('MPL', entities[0].all)
e = entities[2]
self.assert_(isinstance(e, parser.Entity))
self.assertEqual(e.key, 'foo')
self.assertEqual(e.val, 'value')
self.assertEqual(len(entities), 4)
def testBOM(self):
- self._test(u'\ufeff<!ENTITY foo.label "stuff">'.encode('utf-8'),
+ self._test(u'\ufeff<!ENTITY foo.label "stuff">',
(('foo.label', 'stuff'),))
def test_trailing_whitespace(self):
self._test('<!ENTITY foo.label "stuff">\n \n',
(('foo.label', 'stuff'), (Whitespace, '\n \n')))
def test_unicode_comment(self):
- self._test('<!-- \xe5\x8f\x96 -->',
+ self._test(b'<!-- \xe5\x8f\x96 -->'.decode('utf-8'),
((Comment, u'\u53d6'),))
def test_empty_file(self):
self._test('', tuple())
self._test('\n', ((Whitespace, '\n'),))
self._test('\n\n', ((Whitespace, '\n\n'),))
self._test(' \n\n', ((Whitespace, ' \n\n'),))
def test_positions(self):
- self.parser.readContents('''\
+ self.parser.readContents(b'''\
<!ENTITY one "value">
<!ENTITY two "other
escaped value">
''')
one, two = list(self.parser)
self.assertEqual(one.position(), (1, 1))
self.assertEqual(one.value_position(), (1, 16))
self.assertEqual(one.position(-1), (1, 23))
self.assertEqual(two.position(), (2, 1))
self.assertEqual(two.value_position(), (2, 16))
self.assertEqual(two.value_position(-1), (3, 14))
self.assertEqual(two.value_position(10), (3, 5))
def test_word_count(self):
- self.parser.readContents('''\
+ self.parser.readContents(b'''\
<!ENTITY a "one">
<!ENTITY b "one<br>two">
<!ENTITY c "one<span>word</span>">
<!ENTITY d "one <a href='foo'>two</a> three">
''')
a, b, c, d = list(self.parser)
self.assertEqual(a.count_words(), 1)
self.assertEqual(b.count_words(), 2)
self.assertEqual(c.count_words(), 1)
self.assertEqual(d.count_words(), 3)
def test_html_entities(self):
- self.parser.readContents('''\
+ self.parser.readContents(b'''\
<!ENTITY named "&">
<!ENTITY numcode "&">
<!ENTITY shorthexcode "&">
<!ENTITY longhexcode "&">
<!ENTITY unknown "&unknownEntity;">
''')
entities = iter(self.parser)
@@ -179,17 +180,17 @@ escaped value">
self.assertEqual(entity.raw_val, '&')
self.assertEqual(entity.val, '&')
entity = next(entities)
self.assertEqual(entity.raw_val, '&unknownEntity;')
self.assertEqual(entity.val, '&unknownEntity;')
def test_comment_val(self):
- self.parser.readContents('''\
+ self.parser.readContents(b'''\
<!-- comment
spanning lines --> <!--
-->
<!-- last line -->
''')
entities = self.parser.walk()
entity = next(entities)
--- a/compare_locales/tests/test_ftl.py
+++ b/compare_locales/tests/test_ftl.py
@@ -10,40 +10,40 @@ from compare_locales import parser
from compare_locales.tests import ParserTestMixin
class TestFluentParser(ParserTestMixin, unittest.TestCase):
maxDiff = None
filename = 'foo.ftl'
def test_equality_same(self):
- source = 'progress = Progress: { NUMBER($num, style: "percent") }.'
+ source = b'progress = Progress: { NUMBER($num, style: "percent") }.'
self.parser.readContents(source)
[ent1] = list(self.parser)
self.parser.readContents(source)
[ent2] = list(self.parser)
self.assertTrue(ent1.equals(ent2))
def test_equality_different_whitespace(self):
- source1 = 'foo = { $arg }'
- source2 = 'foo = { $arg }'
+ source1 = b'foo = { $arg }'
+ source2 = b'foo = { $arg }'
self.parser.readContents(source1)
[ent1] = list(self.parser)
self.parser.readContents(source2)
[ent2] = list(self.parser)
self.assertTrue(ent1.equals(ent2))
def test_word_count(self):
- self.parser.readContents('''\
+ self.parser.readContents(b'''\
a = One
b = One two three
c = One { $arg } two
d =
One { $arg ->
*[x] Two three
[y] Four
} five.
@@ -72,75 +72,75 @@ h =
self.assertEqual(c.count_words(), 2)
self.assertEqual(d.count_words(), 5)
self.assertEqual(e.count_words(), 1)
self.assertEqual(f.count_words(), 2)
self.assertEqual(g.count_words(), 3)
self.assertEqual(h.count_words(), 10)
def test_simple_message(self):
- self.parser.readContents('a = A')
+ self.parser.readContents(b'a = A')
[a] = list(self.parser)
self.assertEqual(a.key, 'a')
self.assertEqual(a.val, 'A')
self.assertEqual(a.all, 'a = A')
attributes = list(a.attributes)
self.assertEqual(len(attributes), 0)
def test_complex_message(self):
- self.parser.readContents('abc = A { $arg } B { msg } C')
+ self.parser.readContents(b'abc = A { $arg } B { msg } C')
[abc] = list(self.parser)
self.assertEqual(abc.key, 'abc')
self.assertEqual(abc.val, 'A { $arg } B { msg } C')
self.assertEqual(abc.all, 'abc = A { $arg } B { msg } C')
def test_multiline_message(self):
- self.parser.readContents('''\
+ self.parser.readContents(b'''\
abc =
A
B
C
''')
[abc] = list(self.parser)
self.assertEqual(abc.key, 'abc')
self.assertEqual(abc.val, 'A\n B\n C')
self.assertEqual(abc.all, 'abc =\n A\n B\n C')
def test_message_with_attribute(self):
- self.parser.readContents('''\
+ self.parser.readContents(b'''\
abc = ABC
.attr = Attr
''')
[abc] = list(self.parser)
self.assertEqual(abc.key, 'abc')
self.assertEqual(abc.val, 'ABC')
self.assertEqual(abc.all, 'abc = ABC\n .attr = Attr')
def test_message_with_attribute_and_no_value(self):
- self.parser.readContents('''\
+ self.parser.readContents(b'''\
abc
.attr = Attr
''')
[abc] = list(self.parser)
self.assertEqual(abc.key, 'abc')
self.assertEqual(abc.val, None)
self.assertEqual(abc.all, 'abc\n .attr = Attr')
attributes = list(abc.attributes)
self.assertEqual(len(attributes), 1)
attr = attributes[0]
self.assertEqual(attr.key, 'attr')
self.assertEqual(attr.val, 'Attr')
def test_non_localizable(self):
- self.parser.readContents('''\
+ self.parser.readContents(b'''\
### Resource Comment
foo = Foo
## Group Comment
-bar = Bar
@@ -206,17 +206,17 @@ baz = Baz
self.assertEqual(entity.val, 'Baz')
self.assertEqual(entity.entry.comment.content, 'Baz Comment')
entity = next(entities)
self.assertTrue(isinstance(entity, parser.Whitespace))
self.assertEqual(entity.all, '\n')
def test_non_localizable_syntax_zero_four(self):
- self.parser.readContents('''\
+ self.parser.readContents(b'''\
// Resource Comment
foo = Foo
// Section Comment
[[ Section Header ]]
bar = Bar
@@ -286,17 +286,17 @@ baz = Baz
self.assertEqual(entity.val, 'Baz')
self.assertEqual(entity.entry.comment.content, 'Baz Comment')
entity = next(entities)
self.assertTrue(isinstance(entity, parser.Whitespace))
self.assertEqual(entity.all, '\n')
def test_comments_val(self):
- self.parser.readContents('''\
+ self.parser.readContents(b'''\
// Legacy Comment
### Resource Comment
## Section Comment
# Standalone Comment
''')
--- a/compare_locales/tests/test_ini.py
+++ b/compare_locales/tests/test_ini.py
@@ -1,14 +1,15 @@
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import
+from __future__ import unicode_literals
import unittest
from compare_locales.tests import ParserTestMixin
from compare_locales.parser import (
Comment,
IniSection,
Junk,
Whitespace,
--- a/compare_locales/tests/test_merge.py
+++ b/compare_locales/tests/test_merge.py
@@ -14,21 +14,23 @@ from compare_locales.compare import Cont
from compare_locales import mozpath
class ContentMixin(object):
extension = None # OVERLOAD
def reference(self, content):
self.ref = mozpath.join(self.tmp, "en-reference" + self.extension)
- open(self.ref, "w").write(content)
+ with open(self.ref, "w") as f:
+ f.write(content)
def localized(self, content):
self.l10n = mozpath.join(self.tmp, "l10n" + self.extension)
- open(self.l10n, "w").write(content)
+ with open(self.l10n, "w") as f:
+ f.write(content)
class TestProperties(unittest.TestCase, ContentMixin):
extension = '.properties'
def setUp(self):
self.maxDiff = None
self.tmp = mkdtemp()
--- a/compare_locales/tests/test_properties.py
+++ b/compare_locales/tests/test_properties.py
@@ -76,17 +76,17 @@ and an end''', (('bar', 'one line with a
foo=value
''', (
(Comment, 'MPL'),
(Whitespace, '\n\n'),
('foo', 'value'),
(Whitespace, '\n')))
def test_escapes(self):
- self.parser.readContents(r'''
+ self.parser.readContents(br'''
# unicode escapes
zero = some \unicode
one = \u0
two = \u41
three = \u042
four = \u0043
five = \u0044a
six = \a
@@ -156,32 +156,32 @@ foo = bar
def test_empty_file(self):
self._test('', tuple())
self._test('\n', ((Whitespace, '\n'),))
self._test('\n\n', ((Whitespace, '\n\n'),))
self._test(' \n\n', ((Whitespace, '\n\n'),))
def test_positions(self):
- self.parser.readContents('''\
+ self.parser.readContents(b'''\
one = value
two = other \\
escaped value
''')
one, two = list(self.parser)
self.assertEqual(one.position(), (1, 1))
self.assertEqual(one.value_position(), (1, 7))
self.assertEqual(two.position(), (2, 1))
self.assertEqual(two.value_position(), (2, 7))
self.assertEqual(two.value_position(-1), (3, 14))
self.assertEqual(two.value_position(10), (3, 3))
# Bug 1399059 comment 18
def test_z(self):
- self.parser.readContents('''\
+ self.parser.readContents(b'''\
one = XYZ ABC
''')
one, = list(self.parser)
self.assertEqual(one.val, 'XYZ ABC')
if __name__ == '__main__':
unittest.main()