bug 1388313, loads of string, unicode, byte fixes, r=emin draft
authorAxel Hecht <axel@pike.org>
Thu, 17 Aug 2017 12:18:07 +0200
changeset 476 90d248e778ab14467f5411cb401caf0071279820
parent 471 26a6679b33e9f5c3a7c44360a2d6121a14049d2c
child 477 3e15728a75beda5b1a5b29a2919f147a321ae63c
push id160
push useraxel@mozilla.com
push dateMon, 05 Mar 2018 14:24:27 +0000
reviewersemin
bugs1388313
bug 1388313, loads of string, unicode, byte fixes, r=emin MozReview-Commit-ID: JYGZ7g71i2h
compare_locales/checks.py
compare_locales/parser.py
compare_locales/tests/__init__.py
compare_locales/tests/test_checks.py
compare_locales/tests/test_defines.py
compare_locales/tests/test_dtd.py
compare_locales/tests/test_ftl.py
compare_locales/tests/test_ini.py
compare_locales/tests/test_merge.py
compare_locales/tests/test_properties.py
--- a/compare_locales/checks.py
+++ b/compare_locales/checks.py
@@ -1,24 +1,21 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import
+from __future__ import unicode_literals
 import re
 from collections import Counter
 from difflib import SequenceMatcher
 from xml import sax
 import six
 from six.moves import range
 from six.moves import zip
-try:
-    from cStringIO import StringIO
-except ImportError:
-    from StringIO import StringIO
 
 from fluent.syntax import ast as ftl
 
 from compare_locales.parser import DTDParser, PropertiesEntity, FluentMessage
 from compare_locales import plurals
 
 
 class Checker(object):
@@ -220,17 +217,17 @@ class DTDChecker(Checker):
     <!ENTITY key ""> definition to the header.
 
     Also checks for some CSS and number heuristics in the values.
     """
     pattern = re.compile('.*\.dtd$')
     needs_reference = True  # to cast a wider net for known entity references
 
     eref = re.compile('&(%s);' % DTDParser.Name)
-    tmpl = '''<!DOCTYPE elem [%s]>
+    tmpl = b'''<!DOCTYPE elem [%s]>
 <elem>%s</elem>
 '''
     xmllist = set(('amp', 'lt', 'gt', 'apos', 'quot'))
 
     def __init__(self, extra_tests, locale=None):
         super(DTDChecker, self).__init__(extra_tests, locale=locale)
         self.processContent = False
         if self.extra_tests is not None and 'android-dtd' in self.extra_tests:
@@ -242,17 +239,17 @@ class DTDChecker(Checker):
             self.__known_entities = set()
             for ent in self.reference:
                 self.__known_entities.update(
                     self.entities_for_value(ent.raw_val))
         return self.__known_entities if self.__known_entities is not None \
             else self.entities_for_value(refValue)
 
     def entities_for_value(self, value):
-        reflist = set(m.group(1).encode('utf-8')
+        reflist = set(m.group(1)
                       for m in self.eref.finditer(value))
         reflist -= self.xmllist
         return reflist
 
     # Setup for XML parser, with default and text-only content handler
     class TextContent(sax.handler.ContentHandler):
         textcontent = ''
 
@@ -283,44 +280,48 @@ class DTDChecker(Checker):
         reflist = self.known_entities(refValue)
         inContext = self.entities_for_value(refValue)
         entities = ''.join('<!ENTITY %s "">' % s for s in sorted(reflist))
         parser = sax.make_parser()
         parser.setFeature(sax.handler.feature_external_ges, False)
 
         parser.setContentHandler(self.defaulthandler)
         try:
-            parser.parse(StringIO(self.tmpl %
-                                  (entities, refValue.encode('utf-8'))))
+            parser.parse(
+                six.BytesIO(self.tmpl %
+                            (entities.encode('utf-8'),
+                             refValue.encode('utf-8'))))
             # also catch stray %
-            parser.parse(StringIO(self.tmpl %
-                                  (refEnt.all.encode('utf-8') + entities,
-                                   '&%s;' % refEnt.key.encode('utf-8'))))
+            parser.parse(
+                six.BytesIO(self.tmpl %
+                            ((refEnt.all + entities).encode('utf-8'),
+                             b'&%s;' % refEnt.key.encode('utf-8'))))
         except sax.SAXParseException as e:
             yield ('warning',
                    (0, 0),
                    "can't parse en-US value", 'xmlparse')
 
         # find entities the l10nValue references,
         # reusing markup from DTDParser.
         l10nlist = self.entities_for_value(l10nValue)
         missing = sorted(l10nlist - reflist)
         _entities = entities + ''.join('<!ENTITY %s "">' % s for s in missing)
         if self.processContent:
             self.texthandler.textcontent = ''
             parser.setContentHandler(self.texthandler)
         try:
-            parser.parse(StringIO(self.tmpl % (_entities,
+            parser.parse(six.BytesIO(self.tmpl % (_entities.encode('utf-8'),
                          l10nValue.encode('utf-8'))))
             # also catch stray %
             # if this fails, we need to substract the entity definition
             parser.setContentHandler(self.defaulthandler)
-            parser.parse(StringIO(self.tmpl % (
-                l10nEnt.all.encode('utf-8') + _entities,
-                '&%s;' % l10nEnt.key.encode('utf-8'))))
+            parser.parse(
+                six.BytesIO(self.tmpl %
+                            ((l10nEnt.all + _entities).encode('utf-8'),
+                             b'&%s;' % l10nEnt.key.encode('utf-8'))))
         except sax.SAXParseException as e:
             # xml parse error, yield error
             # sometimes, the error is reported on our fake closing
             # element, make that the end of the last line
             lnr = e.getLineNumber() - 1
             lines = l10nValue.splitlines()
             if lnr > len(lines):
                 lnr = len(lines)
@@ -342,24 +343,24 @@ class DTDChecker(Checker):
                     ', '.join(sorted(inContext))
                 if elsewhere:
                     warntmpl += ', %s known)' % ', '.join(sorted(elsewhere))
                 else:
                     warntmpl += ')'
             else:
                 warntmpl += ' (%s known)' % ', '.join(sorted(reflist))
         for key in missing:
-            yield ('warning', (0, 0), warntmpl % key.decode('utf-8'),
+            yield ('warning', (0, 0), warntmpl % key,
                    'xmlparse')
         if inContext and l10nlist and l10nlist - inContext - set(missing):
             mismatch = sorted(l10nlist - inContext - set(missing))
             for key in mismatch:
                 yield ('warning', (0, 0),
                        'Entity %s referenced, but %s used in context' % (
-                           key.decode('utf-8'),
+                           key,
                            ', '.join(sorted(inContext))
                 ), 'xmlparse')
 
         # Number check
         if self.num.match(refValue) and not self.num.match(l10nValue):
             yield ('warning', 0, 'reference is a number', 'number')
         # CSS checks
         # just a length, width="100em"
@@ -444,22 +445,22 @@ class DTDChecker(Checker):
             q = "[\"']"
             offset = -1
         stray_quot = re.compile(r"[\\\\]*(%s)" % q)
 
         for m in stray_quot.finditer(val):
             if len(m.group(0)) % 2:
                 # found an unescaped single or double quote, which message?
                 if m.group(1) == '"':
-                    msg = u"Quotes in Android DTDs need escaping with \\\" "\
-                          u"or \\u0022, or put string in apostrophes."
+                    msg = "Quotes in Android DTDs need escaping with \\\" "\
+                          "or \\u0022, or put string in apostrophes."
                 else:
-                    msg = u"Apostrophes in Android DTDs need escaping with "\
-                          u"\\' or \\u0027, or use \u2019, or put string in "\
-                          u"quotes."
+                    msg = "Apostrophes in Android DTDs need escaping with "\
+                          "\\' or \\u0027, or use \u2019, or put string in "\
+                          "quotes."
                 yield ('error', m.end(0)+offset, msg, 'android')
 
 
 class FluentChecker(Checker):
     '''Tests to run on Fluent (FTL) files.
     '''
     pattern = re.compile('.*\.ftl')
 
--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -1,13 +1,14 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import
+from __future__ import unicode_literals
 import re
 import bisect
 import codecs
 from collections import Counter
 import logging
 
 try:
     from html import unescape as html_unescape
@@ -106,18 +107,18 @@ class EntityBase(object):
 
     re_br = re.compile('<br\s*/?>', re.U)
     re_sgml = re.compile('</?\w+.*?>', re.U | re.M)
 
     def count_words(self):
         """Count the words in an English string.
         Replace a couple of xml markup to make that safer, too.
         """
-        value = self.re_br.sub(u'\n', self.val)
-        value = self.re_sgml.sub(u'', value)
+        value = self.re_br.sub('\n', self.val)
+        value = self.re_sgml.sub('', value)
         return len(value.split())
 
     def equals(self, other):
         return self.key == other.key and self.val == other.val
 
 
 class Entity(EntityBase):
     pass
@@ -239,17 +240,17 @@ class Parser(object):
 
     def __init__(self):
         if not hasattr(self, 'encoding'):
             self.encoding = 'utf-8'
         self.ctx = None
         self.last_comment = None
 
     def readFile(self, file):
-        with open(file, 'rU') as f:
+        with open(file, 'rbU') as f:
             try:
                 self.readContents(f.read())
             except UnicodeDecodeError as e:
                 (logging.getLogger('locales')
                         .error("Can't read file: " + file + '; ' + str(e)))
 
     def readContents(self, contents):
         '''Read contents and create parsing context.
@@ -359,32 +360,32 @@ class DTDEntity(Entity):
 
 class DTDParser(Parser):
     # http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar
     # ":" | [A-Z] | "_" | [a-z] |
     # [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
     # | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
     # [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
     # [#x10000-#xEFFFF]
-    CharMinusDash = u'\x09\x0A\x0D\u0020-\u002C\u002E-\uD7FF\uE000-\uFFFD'
+    CharMinusDash = '\x09\x0A\x0D\u0020-\u002C\u002E-\uD7FF\uE000-\uFFFD'
     XmlComment = '<!--(?:-?[%s])*?-->' % CharMinusDash
-    NameStartChar = u':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \
-        u'\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F' + \
-        u'\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD'
+    NameStartChar = ':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \
+        '\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F' + \
+        '\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD'
     # + \U00010000-\U000EFFFF seems to be unsupported in python
 
     # NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 |
     #     [#x0300-#x036F] | [#x203F-#x2040]
-    NameChar = NameStartChar + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040'
+    NameChar = NameStartChar + r'\-\.0-9' + '\xB7\u0300-\u036F\u203F-\u2040'
     Name = '[' + NameStartChar + '][' + NameChar + ']*'
     reKey = re.compile('<!ENTITY\s+(?P<key>' + Name + ')\s+'
                        '(?P<val>\"[^\"]*\"|\'[^\']*\'?)\s*>',
                        re.DOTALL | re.M)
     # add BOM to DTDs, details in bug 435002
-    reHeader = re.compile(u'^\ufeff')
+    reHeader = re.compile('^\ufeff')
     reComment = re.compile('<!--(?P<val>-?[%s])*?-->' % CharMinusDash,
                            re.S)
     rePE = re.compile(u'<!ENTITY\s+%\s+(?P<key>' + Name + ')\s+'
                       u'SYSTEM\s+(?P<val>\"[^\"]*\"|\'[^\']*\')\s*>\s*'
                       u'%' + Name + ';'
                       u'(?:[ \t]*(?:' + XmlComment + u'\s*)*\n?)?')
 
     class Comment(Comment):
--- a/compare_locales/tests/__init__.py
+++ b/compare_locales/tests/__init__.py
@@ -27,25 +27,25 @@ class ParserTestMixin():
 
     def tearDown(self):
         'tear down this test'
         del self.parser
 
     def resource(self, name):
         testcontent = resource_string(__name__, 'data/' + name)
         # fake universal line endings
-        testcontent = re.sub('\r\n?', lambda m: '\n', testcontent)
+        testcontent = re.sub(b'\r\n?', lambda m: b'\n', testcontent)
         return testcontent
 
-    def _test(self, content, refs):
+    def _test(self, unicode_content, refs):
         '''Helper to test the parser.
         Compares the result of parsing content with the given list
         of reference keys and values.
         '''
-        self.parser.readContents(content)
+        self.parser.readContents(unicode_content.encode(self.parser.encoding))
         entities = list(self.parser.walk())
         for entity, ref in zip_longest(entities, refs):
             self.assertTrue(entity,
                             'excess reference entity ' + six.text_type(ref))
             self.assertTrue(ref,
                             'excess parsed entity ' + six.text_type(entity))
             if isinstance(entity, parser.Entity):
                 self.assertEqual(entity.key, ref[0])
--- a/compare_locales/tests/test_checks.py
+++ b/compare_locales/tests/test_checks.py
@@ -1,19 +1,21 @@
 # -*- coding: utf-8 -*-
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import
+from __future__ import unicode_literals
 import unittest
 
 from compare_locales.checks import getChecker
 from compare_locales.parser import getParser, Parser, DTDEntity
 from compare_locales.paths import File
+import six
 from six.moves import range
 
 
 class BaseHelper(unittest.TestCase):
     file = None
     refContent = None
 
     def setUp(self):
@@ -32,244 +34,247 @@ class BaseHelper(unittest.TestCase):
             checker.set_reference(self.refList)
         ref = self.refList[self.refMap[l10n.key]]
         found = tuple(checker.check(ref, l10n))
         self.assertEqual(found, refWarnOrErrors)
 
 
 class TestProperties(BaseHelper):
     file = File('foo.properties', 'foo.properties')
-    refContent = '''some = value
+    refContent = b'''some = value
 '''
 
     def testGood(self):
-        self._test('''some = localized''',
+        self._test(b'''some = localized''',
                    tuple())
 
     def testMissedEscape(self):
-        self._test(r'''some = \u67ood escape, bad \escape''',
+        self._test(br'''some = \u67ood escape, bad \escape''',
                    (('warning', 20, r'unknown escape sequence, \e',
                      'escape'),))
 
 
 class TestPlurals(BaseHelper):
     file = File('foo.properties', 'foo.properties')
-    refContent = '''\
+    refContent = b'''\
 # LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
 # See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
 # #1 number of files
 # example: 111 files - Downloads
 downloadsTitleFiles=#1 file - Downloads;#1 files - #2
 '''
 
     def testGood(self):
-        self._test('''\
+        self._test(b'''\
 # LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
 # See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
 # #1 number of files
 # example: 111 files - Downloads
 downloadsTitleFiles=#1 file - Downloads;#1 files - #2;#1 filers
 ''',
                    tuple())
 
     def testNotUsed(self):
-        self._test('''\
+        self._test(b'''\
 # LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
 # See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
 # #1 number of files
 # example: 111 files - Downloads
 downloadsTitleFiles=#1 file - Downloads;#1 files - Downloads;#1 filers
 ''',
                    (('warning', 0, 'not all variables used in l10n',
                      'plural'),))
 
     def testNotDefined(self):
-        self._test('''\
+        self._test(b'''\
 # LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
 # See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
 # #1 number of files
 # example: 111 files - Downloads
 downloadsTitleFiles=#1 file - Downloads;#1 files - #2;#1 #3
 ''',
                    (('error', 0, 'unreplaced variables in l10n', 'plural'),))
 
 
 class TestPluralForms(BaseHelper):
     file = File('foo.properties', 'foo.properties', locale='en-GB')
-    refContent = '''\
+    refContent = b'''\
 # LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
 # See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
 # #1 number of files
 # example: 111 files - Downloads
 downloadsTitleFiles=#1 file;#1 files
 '''
 
     def test_matching_forms(self):
-        self._test('''\
+        self._test(b'''\
 downloadsTitleFiles=#1 fiiilee;#1 fiiilees
 ''',
                    tuple())
 
     def test_lacking_forms(self):
-        self._test('''\
+        self._test(b'''\
 downloadsTitleFiles=#1 fiiilee
 ''',
                    (('warning', 0, 'expecting 2 plurals, found 1', 'plural'),))
 
     def test_excess_forms(self):
-        self._test('''\
+        self._test(b'''\
 downloadsTitleFiles=#1 fiiilee;#1 fiiilees;#1 fiiilees
 ''',
                    (('warning', 0, 'expecting 2 plurals, found 3', 'plural'),))
 
 
 class TestDTDs(BaseHelper):
     file = File('foo.dtd', 'foo.dtd')
-    refContent = '''<!ENTITY foo "This is &apos;good&apos;">
+    refContent = b'''<!ENTITY foo "This is &apos;good&apos;">
 <!ENTITY width "10ch">
 <!ENTITY style "width: 20ch; height: 280px;">
 <!ENTITY minStyle "min-height: 50em;">
 <!ENTITY ftd "0">
 <!ENTITY formatPercent "This is 100&#037; correct">
 <!ENTITY some.key "K">
 '''
 
     def testWarning(self):
-        self._test('''<!ENTITY foo "This is &not; good">
+        self._test(b'''<!ENTITY foo "This is &not; good">
 ''',
                    (('warning', (0, 0), 'Referencing unknown entity `not`',
                      'xmlparse'),))
         # make sure we only handle translated entity references
-        self._test(u'''<!ENTITY foo "This is &ƞǿŧ; good">
+        self._test('''<!ENTITY foo "This is &ƞǿŧ; good">
 '''.encode('utf-8'),
-            (('warning', (0, 0), u'Referencing unknown entity `ƞǿŧ`',
+            (('warning', (0, 0), 'Referencing unknown entity `ƞǿŧ`',
               'xmlparse'),))
 
     def testErrorFirstLine(self):
-        self._test('''<!ENTITY foo "This is </bad> stuff">
+        self._test(b'''<!ENTITY foo "This is </bad> stuff">
 ''',
                    (('error', (1, 10), 'mismatched tag', 'xmlparse'),))
 
     def testErrorSecondLine(self):
-        self._test('''<!ENTITY foo "This is
+        self._test(b'''<!ENTITY foo "This is
   </bad>
 stuff">
 ''',
                    (('error', (2, 4), 'mismatched tag', 'xmlparse'),))
 
     def testKeyErrorSingleAmpersand(self):
-        self._test('''<!ENTITY some.key "&">
+        self._test(b'''<!ENTITY some.key "&">
 ''',
                    (('error', (1, 1), 'not well-formed (invalid token)',
                      'xmlparse'),))
 
     def testXMLEntity(self):
-        self._test('''<!ENTITY foo "This is &quot;good&quot;">
+        self._test(b'''<!ENTITY foo "This is &quot;good&quot;">
 ''',
                    tuple())
 
     def testPercentEntity(self):
-        self._test('''<!ENTITY formatPercent "Another 100&#037;">
+        self._test(b'''<!ENTITY formatPercent "Another 100&#037;">
 ''',
                    tuple())
-        self._test('''<!ENTITY formatPercent "Bad 100% should fail">
+        self._test(b'''<!ENTITY formatPercent "Bad 100% should fail">
 ''',
                    (('error', (0, 32), 'not well-formed (invalid token)',
                      'xmlparse'),))
 
     def testNoNumber(self):
-        self._test('''<!ENTITY ftd "foo">''',
+        self._test(b'''<!ENTITY ftd "foo">''',
                    (('warning', 0, 'reference is a number', 'number'),))
 
     def testNoLength(self):
-        self._test('''<!ENTITY width "15miles">''',
+        self._test(b'''<!ENTITY width "15miles">''',
                    (('error', 0, 'reference is a CSS length', 'css'),))
 
     def testNoStyle(self):
-        self._test('''<!ENTITY style "15ch">''',
+        self._test(b'''<!ENTITY style "15ch">''',
                    (('error', 0, 'reference is a CSS spec', 'css'),))
-        self._test('''<!ENTITY style "junk">''',
+        self._test(b'''<!ENTITY style "junk">''',
                    (('error', 0, 'reference is a CSS spec', 'css'),))
 
     def testStyleWarnings(self):
-        self._test('''<!ENTITY style "width:15ch">''',
+        self._test(b'''<!ENTITY style "width:15ch">''',
                    (('warning', 0, 'height only in reference', 'css'),))
-        self._test('''<!ENTITY style "width:15em;height:200px;">''',
+        self._test(b'''<!ENTITY style "width:15em;height:200px;">''',
                    (('warning', 0, "units for width don't match (em != ch)",
                      'css'),))
 
     def testNoWarning(self):
-        self._test('''<!ENTITY width "12em">''', tuple())
-        self._test('''<!ENTITY style "width:12ch;height:200px;">''', tuple())
-        self._test('''<!ENTITY ftd "0">''', tuple())
+        self._test(b'''<!ENTITY width "12em">''', tuple())
+        self._test(b'''<!ENTITY style "width:12ch;height:200px;">''', tuple())
+        self._test(b'''<!ENTITY ftd "0">''', tuple())
 
 
 class TestEntitiesInDTDs(BaseHelper):
     file = File('foo.dtd', 'foo.dtd')
-    refContent = '''<!ENTITY short "This is &brandShortName;">
+    refContent = b'''<!ENTITY short "This is &brandShortName;">
 <!ENTITY shorter "This is &brandShorterName;">
 <!ENTITY ent.start "Using &brandShorterName; start to">
 <!ENTITY ent.end " end">
 '''
 
     def testOK(self):
-        self._test('''<!ENTITY ent.start "Mit &brandShorterName;">''', tuple())
+        self._test(b'''<!ENTITY ent.start "Mit &brandShorterName;">''',
+                   tuple())
 
     def testMismatch(self):
-        self._test('''<!ENTITY ent.start "Mit &brandShortName;">''',
+        self._test(b'''<!ENTITY ent.start "Mit &brandShortName;">''',
                    (('warning', (0, 0),
                      'Entity brandShortName referenced, '
                      'but brandShorterName used in context',
                      'xmlparse'),))
 
     def testAcross(self):
-        self._test('''<!ENTITY ent.end "Mit &brandShorterName;">''',
+        self._test(b'''<!ENTITY ent.end "Mit &brandShorterName;">''',
                    tuple())
 
     def testAcrossWithMismatch(self):
         '''If we could tell that ent.start and ent.end are one string,
         we should warn. Sadly, we can't, so this goes without warning.'''
-        self._test('''<!ENTITY ent.end "Mit &brandShortName;">''',
+        self._test(b'''<!ENTITY ent.end "Mit &brandShortName;">''',
                    tuple())
 
     def testUnknownWithRef(self):
-        self._test('''<!ENTITY ent.start "Mit &foopy;">''',
+        self._test(b'''<!ENTITY ent.start "Mit &foopy;">''',
                    (('warning',
                      (0, 0),
                      'Referencing unknown entity `foopy` '
                      '(brandShorterName used in context, '
                      'brandShortName known)',
                      'xmlparse'),))
 
     def testUnknown(self):
-        self._test('''<!ENTITY ent.end "Mit &foopy;">''',
+        self._test(b'''<!ENTITY ent.end "Mit &foopy;">''',
                    (('warning',
                      (0, 0),
                      'Referencing unknown entity `foopy`'
                      ' (brandShortName, brandShorterName known)',
                      'xmlparse'),))
 
 
 class TestAndroid(unittest.TestCase):
     """Test Android checker
 
     Make sure we're hitting our extra rules only if
     we're passing in a DTD file in the embedding/android module.
     """
-    apos_msg = u"Apostrophes in Android DTDs need escaping with \\' or " + \
-               u"\\u0027, or use \u2019, or put string in quotes."
-    quot_msg = u"Quotes in Android DTDs need escaping with \\\" or " + \
-               u"\\u0022, or put string in apostrophes."
+    apos_msg = "Apostrophes in Android DTDs need escaping with \\' or " + \
+               "\\u0027, or use \u2019, or put string in quotes."
+    quot_msg = "Quotes in Android DTDs need escaping with \\\" or " + \
+               "\\u0022, or put string in apostrophes."
 
     def getNext(self, v):
         ctx = Parser.Context(v)
         return DTDEntity(
             ctx, '', (0, len(v)), (), (0, len(v)))
 
     def getDTDEntity(self, v):
+        if isinstance(v, six.binary_type):
+            v = v.decode('utf-8')
         v = v.replace('"', '&quot;')
         ctx = Parser.Context('<!ENTITY foo "%s">' % v)
         return DTDEntity(
             ctx, '', (0, len(v) + 16), (9, 12), (14, len(v) + 14))
 
     def test_android_dtd(self):
         """Testing the actual android checks. The logic is involved,
         so this is a lot of nitty gritty detail tests.
@@ -334,22 +339,22 @@ class TestAndroid(unittest.TestCase):
         l10n = self.getDTDEntity("''\"'")
         self.assertEqual(tuple(checker.check(ref, l10n)),
                          (('error', 1, self.apos_msg, 'android'),))
         l10n = self.getDTDEntity('"\'""')
         self.assertEqual(tuple(checker.check(ref, l10n)),
                          (('error', 2, self.quot_msg, 'android'),))
 
         # broken unicode escape
-        l10n = self.getDTDEntity("Some broken \u098 unicode")
+        l10n = self.getDTDEntity(b"Some broken \u098 unicode")
         self.assertEqual(tuple(checker.check(ref, l10n)),
                          (('error', 12, 'truncated \\uXXXX escape',
                            'android'),))
         # broken unicode escape, try to set the error off
-        l10n = self.getDTDEntity(u"\u9690"*14+"\u006"+"  "+"\u0064")
+        l10n = self.getDTDEntity("\u9690"*14+"\\u006"+"  "+"\\u0064")
         self.assertEqual(tuple(checker.check(ref, l10n)),
                          (('error', 14, 'truncated \\uXXXX escape',
                            'android'),))
 
     def test_android_prop(self):
         f = File("embedding/android/strings.properties", "strings.properties",
                  "embedding/android")
         checker = getChecker(f, extra_tests=['android-dtd'])
@@ -393,17 +398,17 @@ class TestAndroid(unittest.TestCase):
         ref = self.getDTDEntity("plain string")
         l10n = self.getDTDEntity("plain localized string with apos: '")
         self.assertEqual(tuple(checker.check(ref, l10n)),
                          ())
 
     def test_entities_across_dtd(self):
         f = File("browser/strings.dtd", "strings.dtd", "browser")
         p = getParser(f.file)
-        p.readContents('<!ENTITY other "some &good.ref;">')
+        p.readContents(b'<!ENTITY other "some &good.ref;">')
         ref = p.parse()
         checker = getChecker(f)
         checker.set_reference(ref[0])
         # good string
         ref = self.getDTDEntity("plain string")
         l10n = self.getDTDEntity("plain localized string")
         self.assertEqual(tuple(checker.check(ref, l10n)),
                          ())
--- a/compare_locales/tests/test_defines.py
+++ b/compare_locales/tests/test_defines.py
@@ -1,14 +1,15 @@
 # -*- coding: utf-8 -*-
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import
+from __future__ import unicode_literals
 import unittest
 
 from compare_locales.tests import ParserTestMixin
 from compare_locales.parser import (
     Comment,
     DefinesInstruction,
     Junk,
     Whitespace,
--- a/compare_locales/tests/test_dtd.py
+++ b/compare_locales/tests/test_dtd.py
@@ -1,16 +1,17 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 '''Tests for the DTD parser.
 '''
 
 from __future__ import absolute_import
+from __future__ import unicode_literals
 import unittest
 import re
 
 from compare_locales import parser
 from compare_locales.parser import (
     Comment,
     Junk,
     Whitespace,
@@ -87,79 +88,79 @@ class TestDTD(ParserTestMixin, unittest.
         entities = list(p.walk())
         self.assertIsInstance(entities[0], parser.Comment)
         self.assertIn('MPL', entities[0].all)
         e = entities[2]
         self.assert_(isinstance(e, parser.Entity))
         self.assertEqual(e.key, 'foo')
         self.assertEqual(e.val, 'value')
         self.assertEqual(len(entities), 4)
-        p.readContents('''\
+        p.readContents(b'''\
 <!-- This Source Code Form is subject to the terms of the Mozilla Public
    - License, v. 2.0. If a copy of the MPL was not distributed with this file,
    - You can obtain one at http://mozilla.org/MPL/2.0/.  -->
 <!ENTITY foo "value">
 ''')
         entities = list(p.walk())
         self.assert_(isinstance(entities[0], parser.Comment))
         self.assertIn('MPL', entities[0].all)
         e = entities[2]
         self.assert_(isinstance(e, parser.Entity))
         self.assertEqual(e.key, 'foo')
         self.assertEqual(e.val, 'value')
         self.assertEqual(len(entities), 4)
 
     def testBOM(self):
-        self._test(u'\ufeff<!ENTITY foo.label "stuff">'.encode('utf-8'),
+        self._test(u'\ufeff<!ENTITY foo.label "stuff">',
                    (('foo.label', 'stuff'),))
 
     def test_trailing_whitespace(self):
         self._test('<!ENTITY foo.label "stuff">\n  \n',
                    (('foo.label', 'stuff'), (Whitespace, '\n  \n')))
 
     def test_unicode_comment(self):
-        self._test('<!-- \xe5\x8f\x96 -->',
+        self._test(b'<!-- \xe5\x8f\x96 -->'.decode('utf-8'),
                    ((Comment, u'\u53d6'),))
 
     def test_empty_file(self):
         self._test('', tuple())
         self._test('\n', ((Whitespace, '\n'),))
         self._test('\n\n', ((Whitespace, '\n\n'),))
         self._test(' \n\n', ((Whitespace, ' \n\n'),))
 
     def test_positions(self):
-        self.parser.readContents('''\
+        self.parser.readContents(b'''\
 <!ENTITY one  "value">
 <!ENTITY  two "other
 escaped value">
 ''')
         one, two = list(self.parser)
         self.assertEqual(one.position(), (1, 1))
         self.assertEqual(one.value_position(), (1, 16))
         self.assertEqual(one.position(-1), (1, 23))
         self.assertEqual(two.position(), (2, 1))
         self.assertEqual(two.value_position(), (2, 16))
         self.assertEqual(two.value_position(-1), (3, 14))
         self.assertEqual(two.value_position(10), (3, 5))
 
     def test_word_count(self):
-        self.parser.readContents('''\
+        self.parser.readContents(b'''\
 <!ENTITY a "one">
 <!ENTITY b "one<br>two">
 <!ENTITY c "one<span>word</span>">
 <!ENTITY d "one <a href='foo'>two</a> three">
 ''')
         a, b, c, d = list(self.parser)
         self.assertEqual(a.count_words(), 1)
         self.assertEqual(b.count_words(), 2)
         self.assertEqual(c.count_words(), 1)
         self.assertEqual(d.count_words(), 3)
 
     def test_html_entities(self):
-        self.parser.readContents('''\
+        self.parser.readContents(b'''\
 <!ENTITY named "&amp;">
 <!ENTITY numcode "&#38;">
 <!ENTITY shorthexcode "&#x26;">
 <!ENTITY longhexcode "&#x0026;">
 <!ENTITY unknown "&unknownEntity;">
 ''')
         entities = iter(self.parser)
 
@@ -179,17 +180,17 @@ escaped value">
         self.assertEqual(entity.raw_val, '&#x0026;')
         self.assertEqual(entity.val, '&')
 
         entity = next(entities)
         self.assertEqual(entity.raw_val, '&unknownEntity;')
         self.assertEqual(entity.val, '&unknownEntity;')
 
     def test_comment_val(self):
-        self.parser.readContents('''\
+        self.parser.readContents(b'''\
 <!-- comment
 spanning lines -->  <!--
 -->
 <!-- last line -->
 ''')
         entities = self.parser.walk()
 
         entity = next(entities)
--- a/compare_locales/tests/test_ftl.py
+++ b/compare_locales/tests/test_ftl.py
@@ -10,40 +10,40 @@ from compare_locales import parser
 from compare_locales.tests import ParserTestMixin
 
 
 class TestFluentParser(ParserTestMixin, unittest.TestCase):
     maxDiff = None
     filename = 'foo.ftl'
 
     def test_equality_same(self):
-        source = 'progress = Progress: { NUMBER($num, style: "percent") }.'
+        source = b'progress = Progress: { NUMBER($num, style: "percent") }.'
 
         self.parser.readContents(source)
         [ent1] = list(self.parser)
 
         self.parser.readContents(source)
         [ent2] = list(self.parser)
 
         self.assertTrue(ent1.equals(ent2))
 
     def test_equality_different_whitespace(self):
-        source1 = 'foo = { $arg }'
-        source2 = 'foo = {    $arg    }'
+        source1 = b'foo = { $arg }'
+        source2 = b'foo = {    $arg    }'
 
         self.parser.readContents(source1)
         [ent1] = list(self.parser)
 
         self.parser.readContents(source2)
         [ent2] = list(self.parser)
 
         self.assertTrue(ent1.equals(ent2))
 
     def test_word_count(self):
-        self.parser.readContents('''\
+        self.parser.readContents(b'''\
 a = One
 b = One two three
 c = One { $arg } two
 d =
     One { $arg ->
        *[x] Two three
         [y] Four
     } five.
@@ -72,75 +72,75 @@ h =
         self.assertEqual(c.count_words(), 2)
         self.assertEqual(d.count_words(), 5)
         self.assertEqual(e.count_words(), 1)
         self.assertEqual(f.count_words(), 2)
         self.assertEqual(g.count_words(), 3)
         self.assertEqual(h.count_words(), 10)
 
     def test_simple_message(self):
-        self.parser.readContents('a = A')
+        self.parser.readContents(b'a = A')
 
         [a] = list(self.parser)
         self.assertEqual(a.key, 'a')
         self.assertEqual(a.val, 'A')
         self.assertEqual(a.all, 'a = A')
         attributes = list(a.attributes)
         self.assertEqual(len(attributes), 0)
 
     def test_complex_message(self):
-        self.parser.readContents('abc = A { $arg } B { msg } C')
+        self.parser.readContents(b'abc = A { $arg } B { msg } C')
 
         [abc] = list(self.parser)
         self.assertEqual(abc.key, 'abc')
         self.assertEqual(abc.val, 'A { $arg } B { msg } C')
         self.assertEqual(abc.all, 'abc = A { $arg } B { msg } C')
 
     def test_multiline_message(self):
-        self.parser.readContents('''\
+        self.parser.readContents(b'''\
 abc =
     A
     B
     C
 ''')
 
         [abc] = list(self.parser)
         self.assertEqual(abc.key, 'abc')
         self.assertEqual(abc.val, 'A\n    B\n    C')
         self.assertEqual(abc.all, 'abc =\n    A\n    B\n    C')
 
     def test_message_with_attribute(self):
-        self.parser.readContents('''\
+        self.parser.readContents(b'''\
 abc = ABC
     .attr = Attr
 ''')
 
         [abc] = list(self.parser)
         self.assertEqual(abc.key, 'abc')
         self.assertEqual(abc.val, 'ABC')
         self.assertEqual(abc.all, 'abc = ABC\n    .attr = Attr')
 
     def test_message_with_attribute_and_no_value(self):
-        self.parser.readContents('''\
+        self.parser.readContents(b'''\
 abc
     .attr = Attr
 ''')
 
         [abc] = list(self.parser)
         self.assertEqual(abc.key, 'abc')
         self.assertEqual(abc.val, None)
         self.assertEqual(abc.all, 'abc\n    .attr = Attr')
         attributes = list(abc.attributes)
         self.assertEqual(len(attributes), 1)
         attr = attributes[0]
         self.assertEqual(attr.key, 'attr')
         self.assertEqual(attr.val, 'Attr')
 
     def test_non_localizable(self):
-        self.parser.readContents('''\
+        self.parser.readContents(b'''\
 ### Resource Comment
 
 foo = Foo
 
 ## Group Comment
 
 -bar = Bar
 
@@ -206,17 +206,17 @@ baz = Baz
         self.assertEqual(entity.val, 'Baz')
         self.assertEqual(entity.entry.comment.content, 'Baz Comment')
 
         entity = next(entities)
         self.assertTrue(isinstance(entity, parser.Whitespace))
         self.assertEqual(entity.all, '\n')
 
     def test_non_localizable_syntax_zero_four(self):
-        self.parser.readContents('''\
+        self.parser.readContents(b'''\
 // Resource Comment
 
 foo = Foo
 
 // Section Comment
 [[ Section Header ]]
 
 bar = Bar
@@ -286,17 +286,17 @@ baz = Baz
         self.assertEqual(entity.val, 'Baz')
         self.assertEqual(entity.entry.comment.content, 'Baz Comment')
 
         entity = next(entities)
         self.assertTrue(isinstance(entity, parser.Whitespace))
         self.assertEqual(entity.all, '\n')
 
     def test_comments_val(self):
-        self.parser.readContents('''\
+        self.parser.readContents(b'''\
 // Legacy Comment
 
 ### Resource Comment
 
 ## Section Comment
 
 # Standalone Comment
 ''')
--- a/compare_locales/tests/test_ini.py
+++ b/compare_locales/tests/test_ini.py
@@ -1,14 +1,15 @@
 # -*- coding: utf-8 -*-
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import
+from __future__ import unicode_literals
 import unittest
 
 from compare_locales.tests import ParserTestMixin
 from compare_locales.parser import (
     Comment,
     IniSection,
     Junk,
     Whitespace,
--- a/compare_locales/tests/test_merge.py
+++ b/compare_locales/tests/test_merge.py
@@ -14,21 +14,23 @@ from compare_locales.compare import Cont
 from compare_locales import mozpath
 
 
 class ContentMixin(object):
     extension = None  # OVERLOAD
 
     def reference(self, content):
         self.ref = mozpath.join(self.tmp, "en-reference" + self.extension)
-        open(self.ref, "w").write(content)
+        with open(self.ref, "w") as f:
+            f.write(content)
 
     def localized(self, content):
         self.l10n = mozpath.join(self.tmp, "l10n" + self.extension)
-        open(self.l10n, "w").write(content)
+        with open(self.l10n, "w") as f:
+            f.write(content)
 
 
 class TestProperties(unittest.TestCase, ContentMixin):
     extension = '.properties'
 
     def setUp(self):
         self.maxDiff = None
         self.tmp = mkdtemp()
--- a/compare_locales/tests/test_properties.py
+++ b/compare_locales/tests/test_properties.py
@@ -76,17 +76,17 @@ and an end''', (('bar', 'one line with a
 foo=value
 ''', (
             (Comment, 'MPL'),
             (Whitespace, '\n\n'),
             ('foo', 'value'),
             (Whitespace, '\n')))
 
     def test_escapes(self):
-        self.parser.readContents(r'''
+        self.parser.readContents(br'''
 # unicode escapes
 zero = some \unicode
 one = \u0
 two = \u41
 three = \u042
 four = \u0043
 five = \u0044a
 six = \a
@@ -156,32 +156,32 @@ foo = bar
 
     def test_empty_file(self):
         self._test('', tuple())
         self._test('\n', ((Whitespace, '\n'),))
         self._test('\n\n', ((Whitespace, '\n\n'),))
         self._test(' \n\n', ((Whitespace, '\n\n'),))
 
     def test_positions(self):
-        self.parser.readContents('''\
+        self.parser.readContents(b'''\
 one = value
 two = other \\
 escaped value
 ''')
         one, two = list(self.parser)
         self.assertEqual(one.position(), (1, 1))
         self.assertEqual(one.value_position(), (1, 7))
         self.assertEqual(two.position(), (2, 1))
         self.assertEqual(two.value_position(), (2, 7))
         self.assertEqual(two.value_position(-1), (3, 14))
         self.assertEqual(two.value_position(10), (3, 3))
 
     # Bug 1399059 comment 18
     def test_z(self):
-        self.parser.readContents('''\
+        self.parser.readContents(b'''\
 one = XYZ ABC
 ''')
         one, = list(self.parser)
         self.assertEqual(one.val, 'XYZ ABC')
 
 
 if __name__ == '__main__':
     unittest.main()