--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -225,24 +225,16 @@ class Parser:
if (not onlyEntities or
type(entity) is Entity or
type(entity) is Junk):
yield entity
entity, offset = self.getEntity(ctx, offset)
if len(contents) > offset:
yield Junk(ctx, (offset, len(contents)))
- def getHeader(self, contents, offset):
- header = ''
- h = self.reHeader.match(contents)
- if h:
- header = h.group()
- offset = h.end()
- return (header, offset)
-
def getEntity(self, ctx, offset):
m = self.reKey.match(ctx.contents, offset)
if m:
offset = m.end()
entity = self.createEntity(ctx, m)
return (entity, offset)
m = self.reComment.match(ctx.contents, offset)
if m:
@@ -309,32 +301,34 @@ class DTDParser(Parser):
# [#x0300-#x036F] | [#x203F-#x2040]
NameChar = NameStartChar + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040'
Name = '[' + NameStartChar + '][' + NameChar + ']*'
reKey = re.compile('(?:(?P<pre>\s*)(?P<entity><!ENTITY\s+(?P<key>' + Name +
')\s+(?P<val>\"[^\"]*\"|\'[^\']*\'?)\s*>)'
'(?P<post>\s*)?)',
re.DOTALL | re.M)
# add BOM to DTDs, details in bug 435002
- reHeader = re.compile(u'^\ufeff?')
+ reHeader = re.compile(u'^\ufeff')
reComment = re.compile('(\s*)(<!--(-?[%s])*?-->)(\s*)' % CharMinusDash,
re.S)
rePE = re.compile(u'(?:(\s*)'
u'(<!ENTITY\s+%\s+(' + Name +
u')\s+SYSTEM\s+(\"[^\"]*\"|\'[^\']*\')\s*>\s*%' + Name +
u';)([ \t]*(?:' + XmlComment + u'\s*)*\n?)?)')
def getEntity(self, ctx, offset):
'''
Overload Parser.getEntity to special-case ParsedEntities.
Just check for a parsed entity if that method claims junk.
<!ENTITY % foo SYSTEM "url">
%foo;
'''
+ if offset is 0 and self.reHeader.match(ctx.contents):
+ offset += 1
entity, inneroffset = Parser.getEntity(self, ctx, offset)
if (entity and isinstance(entity, Junk)) or entity is None:
m = self.rePE.match(ctx.contents, offset)
if m:
inneroffset = m.end()
self.last_comment = ''
entity = Entity(ctx, self.postProcessValue, '',
*[m.span(i) for i in xrange(6)])
@@ -355,33 +349,21 @@ class DTDParser(Parser):
class PropertiesParser(Parser):
escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|'
'(?P<nl>\n\s*)|(?P<single>.))', re.M)
known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'}
def __init__(self):
self.reKey = re.compile('^(\s*)'
'([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
- self.reHeader = re.compile('^\s*([#!].*\s*)+')
self.reComment = re.compile('(\s*)(((?:[#!][^\n]*\n?)+))', re.M)
self._escapedEnd = re.compile(r'\\+$')
self._trailingWS = re.compile(r'\s*(?:\n|\Z)', re.M)
Parser.__init__(self)
- def getHeader(self, contents, offset):
- header = ''
- h = self.reHeader.match(contents, offset)
- if h:
- candidate = h.group()
- if 'http://mozilla.org/MPL/2.0/' in candidate or \
- 'LICENSE BLOCK' in candidate:
- header = candidate
- offset = h.end()
- return (header, offset)
-
def getEntity(self, ctx, offset):
# overwritten to parse values line by line
contents = ctx.contents
m = self.reComment.match(contents, offset)
if m:
spans = [m.span(i) for i in xrange(3)]
start_trailing = offset = m.end()
while offset < len(contents):
@@ -480,18 +462,16 @@ class DefinesParser(Parser):
self.reKey = re.compile('((?:[ \t]*\n)*)'
'(#define[ \t]+(\w+)[ \t]+(.*?)(?:\n|\Z))'
'((?:[ \t]*(?:\n|\Z))*)',
re.M)
self.rePI = re.compile('((?:[ \t]*\n)*)'
'(#(\w+)[ \t]+(.*?)(?:\n|\Z))'
'((?:[ \t]*(?:\n|\Z))*)',
re.M)
- self.reHeader = re.compile('^\s*(#(?!define\s).*\s*)*')
- self.reFooter = re.compile('\s*(#(?!define\s).*\s*)*$', re.M)
Parser.__init__(self)
def getEntity(self, ctx, offset):
contents = ctx.contents
m = self.reComment.match(contents, offset)
if m:
offset = m.end()
self.last_comment = Comment(ctx, *[m.span(i) for i in xrange(4)])
@@ -549,17 +529,16 @@ class IniParser(Parser):
self.reSection = re.compile(
'((?:[ \t]*\n)*)'
'(\[(.*?)\])'
'((?:[ \t]*(?:\n|\Z))*)', re.M)
self.reKey = re.compile(
'((?:[ \t]*\n)*)'
'((.+?)=(.*))'
'((?:[ \t]*(?:\n|\Z))*)', re.M)
- self.reFooter = re.compile('\s*([;#].*\s*)*$')
Parser.__init__(self)
def getEntity(self, ctx, offset):
contents = ctx.contents
m = self.reComment.match(contents, offset)
if m:
offset = m.end()
self.last_comment = Comment(ctx, *[m.span(i) for i in xrange(4)])