bug 1310980, part 6: drop old header and footer, add back BOMs to DTDs draft
authorAxel Hecht <axel@pike.org>
Fri, 21 Oct 2016 18:20:52 +0200
changeset 147 72ff392e5c8d2bf696869b40eb26d4186b34baff
parent 146 ce58f9cd9893c0f60643deacc6b19461433cc51f
child 148 9f2bf640183abd1b678afc566d11f458fc5c3fd2
child 149 e6f765f205272f7569cb02cbe726020424414568
push id30
push useraxel@mozilla.com
push dateMon, 24 Oct 2016 14:52:08 +0000
bugs1310980
bug 1310980, part 6: drop old header and footer, add back BOMs to DTDs Also add a test for BOMs in DTD files, cause they were broken in this patch queue. MozReview-Commit-ID: CS57IANoFzl
compare_locales/parser.py
--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -225,24 +225,16 @@ class Parser:
             if (not onlyEntities or
                     type(entity) is Entity or
                     type(entity) is Junk):
                 yield entity
             entity, offset = self.getEntity(ctx, offset)
         if len(contents) > offset:
             yield Junk(ctx, (offset, len(contents)))
 
-    def getHeader(self, contents, offset):
-        header = ''
-        h = self.reHeader.match(contents)
-        if h:
-            header = h.group()
-            offset = h.end()
-        return (header, offset)
-
     def getEntity(self, ctx, offset):
         m = self.reKey.match(ctx.contents, offset)
         if m:
             offset = m.end()
             entity = self.createEntity(ctx, m)
             return (entity, offset)
         m = self.reComment.match(ctx.contents, offset)
         if m:
@@ -309,32 +301,34 @@ class DTDParser(Parser):
     #     [#x0300-#x036F] | [#x203F-#x2040]
     NameChar = NameStartChar + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040'
     Name = '[' + NameStartChar + '][' + NameChar + ']*'
     reKey = re.compile('(?:(?P<pre>\s*)(?P<entity><!ENTITY\s+(?P<key>' + Name +
                        ')\s+(?P<val>\"[^\"]*\"|\'[^\']*\'?)\s*>)'
                        '(?P<post>\s*)?)',
                        re.DOTALL | re.M)
     # add BOM to DTDs, details in bug 435002
-    reHeader = re.compile(u'^\ufeff?')
+    reHeader = re.compile(u'^\ufeff')
     reComment = re.compile('(\s*)(<!--(-?[%s])*?-->)(\s*)' % CharMinusDash,
                            re.S)
     rePE = re.compile(u'(?:(\s*)'
                       u'(<!ENTITY\s+%\s+(' + Name +
                       u')\s+SYSTEM\s+(\"[^\"]*\"|\'[^\']*\')\s*>\s*%' + Name +
                       u';)([ \t]*(?:' + XmlComment + u'\s*)*\n?)?)')
 
     def getEntity(self, ctx, offset):
         '''
         Overload Parser.getEntity to special-case ParsedEntities.
         Just check for a parsed entity if that method claims junk.
 
         <!ENTITY % foo SYSTEM "url">
         %foo;
         '''
+        if offset is 0 and self.reHeader.match(ctx.contents):
+            offset += 1
         entity, inneroffset = Parser.getEntity(self, ctx, offset)
         if (entity and isinstance(entity, Junk)) or entity is None:
             m = self.rePE.match(ctx.contents, offset)
             if m:
                 inneroffset = m.end()
                 self.last_comment = ''
                 entity = Entity(ctx, self.postProcessValue, '',
                                 *[m.span(i) for i in xrange(6)])
@@ -355,33 +349,21 @@ class DTDParser(Parser):
 class PropertiesParser(Parser):
     escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|'
                         '(?P<nl>\n\s*)|(?P<single>.))', re.M)
     known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'}
 
     def __init__(self):
         self.reKey = re.compile('^(\s*)'
                                 '([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
-        self.reHeader = re.compile('^\s*([#!].*\s*)+')
         self.reComment = re.compile('(\s*)(((?:[#!][^\n]*\n?)+))', re.M)
         self._escapedEnd = re.compile(r'\\+$')
         self._trailingWS = re.compile(r'\s*(?:\n|\Z)', re.M)
         Parser.__init__(self)
 
-    def getHeader(self, contents, offset):
-        header = ''
-        h = self.reHeader.match(contents, offset)
-        if h:
-            candidate = h.group()
-            if 'http://mozilla.org/MPL/2.0/' in candidate or \
-                    'LICENSE BLOCK' in candidate:
-                header = candidate
-                offset = h.end()
-        return (header, offset)
-
     def getEntity(self, ctx, offset):
         # overwritten to parse values line by line
         contents = ctx.contents
         m = self.reComment.match(contents, offset)
         if m:
             spans = [m.span(i) for i in xrange(3)]
             start_trailing = offset = m.end()
             while offset < len(contents):
@@ -480,18 +462,16 @@ class DefinesParser(Parser):
         self.reKey = re.compile('((?:[ \t]*\n)*)'
                                 '(#define[ \t]+(\w+)[ \t]+(.*?)(?:\n|\Z))'
                                 '((?:[ \t]*(?:\n|\Z))*)',
                                 re.M)
         self.rePI = re.compile('((?:[ \t]*\n)*)'
                                '(#(\w+)[ \t]+(.*?)(?:\n|\Z))'
                                '((?:[ \t]*(?:\n|\Z))*)',
                                re.M)
-        self.reHeader = re.compile('^\s*(#(?!define\s).*\s*)*')
-        self.reFooter = re.compile('\s*(#(?!define\s).*\s*)*$', re.M)
         Parser.__init__(self)
 
     def getEntity(self, ctx, offset):
         contents = ctx.contents
         m = self.reComment.match(contents, offset)
         if m:
             offset = m.end()
             self.last_comment = Comment(ctx, *[m.span(i) for i in xrange(4)])
@@ -549,17 +529,16 @@ class IniParser(Parser):
         self.reSection = re.compile(
             '((?:[ \t]*\n)*)'
             '(\[(.*?)\])'
             '((?:[ \t]*(?:\n|\Z))*)', re.M)
         self.reKey = re.compile(
             '((?:[ \t]*\n)*)'
             '((.+?)=(.*))'
             '((?:[ \t]*(?:\n|\Z))*)', re.M)
-        self.reFooter = re.compile('\s*([;#].*\s*)*$')
         Parser.__init__(self)
 
     def getEntity(self, ctx, offset):
         contents = ctx.contents
         m = self.reComment.match(contents, offset)
         if m:
             offset = m.end()
             self.last_comment = Comment(ctx, *[m.span(i) for i in xrange(4)])