--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -36,27 +36,26 @@ class EntityBase(object):
3: entity key (name)
4: entity value
5: post white space
<--[1]
<!ENTITY key "value">
<-------[2]--------->
'''
- def __init__(self, ctx, pp, pre_comment,
+ def __init__(self, ctx, pre_comment,
span, pre_ws_span, def_span,
key_span, val_span, post_span):
self.ctx = ctx
self.span = span
self.pre_ws_span = pre_ws_span
self.def_span = def_span
self.key_span = key_span
self.val_span = val_span
self.post_span = post_span
- self.pp = pp
self.pre_comment = pre_comment
pass
def position(self, offset=0):
"""Get the 1-based line and column of the character
with given offset into the Entity.
If offset is negative, return the end of the Entity.
@@ -88,32 +87,29 @@ class EntityBase(object):
return self.ctx.contents[self.pre_ws_span[0]:self.pre_ws_span[1]]
def get_def(self):
return self.ctx.contents[self.def_span[0]:self.def_span[1]]
def get_key(self):
return self.ctx.contents[self.key_span[0]:self.key_span[1]]
- def get_val(self):
- return self.pp(self.ctx.contents[self.val_span[0]:self.val_span[1]])
-
def get_raw_val(self):
return self.ctx.contents[self.val_span[0]:self.val_span[1]]
def get_post(self):
return self.ctx.contents[self.post_span[0]:self.post_span[1]]
# getters
all = property(get_all)
pre_ws = property(get_pre_ws)
definition = property(get_def)
key = property(get_key)
- val = property(get_val)
+ val = property(get_raw_val)
raw_val = property(get_raw_val)
post = property(get_post)
def __repr__(self):
return self.key
re_br = re.compile('<br\s*/?>', re.U)
re_sgml = re.compile('</?\w+.*?>', re.U | re.M)
@@ -121,45 +117,32 @@ class EntityBase(object):
def count_words(self):
"""Count the words in an English string.
Replace a couple of xml markup to make that safer, too.
"""
value = self.re_br.sub(u'\n', self.val)
value = self.re_sgml.sub(u'', value)
return len(value.split())
- def __eq__(self, other):
+ def equals(self, other):
return self.key == other.key and self.val == other.val
class Entity(EntityBase):
- def value_position(self, offset=0):
- # DTDChecker already returns tuples of (line, col) positions
- if isinstance(offset, tuple):
- line_pos, col_pos = offset
- line, col = super(Entity, self).value_position()
- if line_pos == 1:
- col = col + col_pos
- else:
- col = col_pos
- line += line_pos - 1
- return line, col
- else:
- return super(Entity, self).value_position(offset)
+ pass
class Comment(EntityBase):
def __init__(self, ctx, span, pre_ws_span, def_span,
post_span):
self.ctx = ctx
self.span = span
self.pre_ws_span = pre_ws_span
self.def_span = def_span
self.post_span = post_span
- self.pp = lambda v: v
@property
def key(self):
return None
@property
def val(self):
return None
@@ -211,17 +194,16 @@ class Whitespace(EntityBase):
'''Entity-like object representing an empty file with whitespace,
if allowed
'''
def __init__(self, ctx, span):
self.ctx = ctx
self.key_span = self.val_span = self.span = span
self.def_span = self.pre_ws_span = (span[0], span[0])
self.post_span = (span[1], span[1])
- self.pp = lambda v: v
def __repr__(self):
return self.raw_val
class Parser(object):
capabilities = CAN_SKIP | CAN_MERGE
tail = re.compile('\s+\Z')
@@ -270,33 +252,30 @@ class Parser(object):
def parse(self):
l = []
m = {}
for e in self:
m[e.key] = len(l)
l.append(e)
return (l, m)
- def postProcessValue(self, val):
- return val
-
def __iter__(self):
return self.walk(onlyEntities=True)
def walk(self, onlyEntities=False):
if not self.ctx:
# loading file failed, or we just didn't load anything
return
ctx = self.ctx
contents = ctx.contents
offset = 0
entity, offset = self.getEntity(ctx, offset)
while entity:
if (not onlyEntities or
- type(entity) is Entity or
+ isinstance(entity, Entity) or
type(entity) is Junk):
yield entity
entity, offset = self.getEntity(ctx, offset)
if len(contents) > offset:
yield Junk(ctx, (offset, len(contents)))
def getEntity(self, ctx, offset):
m = self.reKey.match(ctx.contents, offset)
@@ -323,17 +302,17 @@ class Parser(object):
return (Whitespace(ctx, (offset, white_end)), white_end)
else:
return (None, offset)
return (Junk(ctx, (offset, junkend)), junkend)
def createEntity(self, ctx, m):
pre_comment = unicode(self.last_comment) if self.last_comment else ''
self.last_comment = ''
- return Entity(ctx, self.postProcessValue, pre_comment,
+ return Entity(ctx, pre_comment,
*[m.span(i) for i in xrange(6)])
def getParser(path):
for item in __constructors:
if re.search(item[0], path):
return item[1]
raise UserWarning("Cannot find Parser")
@@ -348,16 +327,32 @@ def getParser(path):
# 6: post comment (and white space) in the same line (dtd only)
# <--[1]
# <!-- pre comments --> <--[2]
# <!ENTITY key "value"> <!-- comment -->
#
# <-------[3]---------><------[6]------>
+class DTDEntity(Entity):
+ def value_position(self, offset=0):
+ # DTDChecker already returns tuples of (line, col) positions
+ if isinstance(offset, tuple):
+ line_pos, col_pos = offset
+ line, col = super(DTDEntity, self).value_position()
+ if line_pos == 1:
+ col = col + col_pos
+ else:
+ col = col_pos
+ line += line_pos - 1
+ return line, col
+ else:
+ return super(DTDEntity, self).value_position(offset)
+
+
class DTDParser(Parser):
# http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar
# ":" | [A-Z] | "_" | [a-z] |
# [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
# | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
# [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
# [#x10000-#xEFFFF]
CharMinusDash = u'\x09\x0A\x0D\u0020-\u002C\u002E-\uD7FF\uE000-\uFFFD'
@@ -395,37 +390,50 @@ class DTDParser(Parser):
if offset is 0 and self.reHeader.match(ctx.contents):
offset += 1
entity, inneroffset = Parser.getEntity(self, ctx, offset)
if (entity and isinstance(entity, Junk)) or entity is None:
m = self.rePE.match(ctx.contents, offset)
if m:
inneroffset = m.end()
self.last_comment = ''
- entity = Entity(ctx, self.postProcessValue, '',
- *[m.span(i) for i in xrange(6)])
+ entity = DTDEntity(ctx, '', *[m.span(i) for i in xrange(6)])
return (entity, inneroffset)
def createEntity(self, ctx, m):
valspan = m.span('val')
valspan = (valspan[0]+1, valspan[1]-1)
pre_comment = unicode(self.last_comment) if self.last_comment else ''
self.last_comment = ''
- return Entity(ctx, self.postProcessValue, pre_comment,
- m.span(),
- m.span('pre'),
- m.span('entity'), m.span('key'), valspan,
- m.span('post'))
+ return DTDEntity(ctx, pre_comment,
+ m.span(),
+ m.span('pre'),
+ m.span('entity'), m.span('key'), valspan,
+ m.span('post'))
-class PropertiesParser(Parser):
+class PropertiesEntity(Entity):
escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|'
'(?P<nl>\n\s*)|(?P<single>.))', re.M)
known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'}
+ @property
+ def val(self):
+ def unescape(m):
+ found = m.groupdict()
+ if found['uni']:
+ return unichr(int(found['uni'][1:], 16))
+ if found['nl']:
+ return ''
+ return self.known_escapes.get(found['single'], found['single'])
+
+ return self.escape.sub(unescape, self.raw_val)
+
+
+class PropertiesParser(Parser):
def __init__(self):
self.reKey = re.compile('^(\s*)'
'([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
self.reComment = re.compile('(\s*)(((?:[#!][^\n]*\n?)+))', re.M)
self._escapedEnd = re.compile(r'\\+$')
self._trailingWS = re.compile(r'\s*(?:\n|\Z)', re.M)
Parser.__init__(self)
@@ -464,50 +472,38 @@ class PropertiesParser(Parser):
# strip trailing whitespace
ws = self._trailingWS.search(contents, startline)
if ws:
endval = ws.start()
offset = ws.end()
pre_comment = (unicode(self.last_comment) if self.last_comment
else '')
self.last_comment = ''
- entity = Entity(ctx, self.postProcessValue, pre_comment,
- (m.start(), offset), # full span
- m.span(1), # leading whitespan
- (m.start(2), offset), # entity def span
- m.span(2), # key span
- (m.end(), endval), # value span
- (offset, offset)) # post comment span, empty
+ entity = PropertiesEntity(
+ ctx, pre_comment,
+ (m.start(), offset), # full span
+ m.span(1), # leading whitespan
+ (m.start(2), offset), # entity def span
+ m.span(2), # key span
+ (m.end(), endval), # value span
+ (offset, offset)) # post comment span, empty
return (entity, offset)
return self.getTrailing(ctx, offset, self.reKey, self.reComment)
- def postProcessValue(self, val):
-
- def unescape(m):
- found = m.groupdict()
- if found['uni']:
- return unichr(int(found['uni'][1:], 16))
- if found['nl']:
- return ''
- return self.known_escapes.get(found['single'], found['single'])
- val = self.escape.sub(unescape, val)
- return val
-
class DefinesInstruction(EntityBase):
'''Entity-like object representing processing instructions in inc files
'''
def __init__(self, ctx, span, pre_ws_span, def_span, val_span, post_span):
self.ctx = ctx
self.span = span
self.pre_ws_span = pre_ws_span
self.def_span = def_span
self.key_span = self.val_span = val_span
self.post_span = post_span
- self.pp = lambda v: v
def __repr__(self):
return self.raw_val
class DefinesParser(Parser):
# can't merge, #unfilter needs to be the last item, which we don't support
capabilities = CAN_COPY
@@ -553,17 +549,16 @@ class IniSection(EntityBase):
'''
def __init__(self, ctx, span, pre_ws_span, def_span, val_span, post_span):
self.ctx = ctx
self.span = span
self.pre_ws_span = pre_ws_span
self.def_span = def_span
self.key_span = self.val_span = val_span
self.post_span = post_span
- self.pp = lambda v: v
def __repr__(self):
return self.raw_val
class IniParser(Parser):
'''
Parse files of the form:
@@ -623,36 +618,32 @@ class FluentEntity(Entity):
if entry.value is not None:
self.val_span = (entry.value.span.start, entry.value.span.end)
else:
self.val_span = (0, 0)
self.entry = entry
- def pp(self, value):
- # XXX Normalize whitespace?
- return value
-
_word_count = None
def count_words(self):
if self._word_count is None:
self._word_count = 0
def count_words(node):
if isinstance(node, ftl.TextElement):
self._word_count += len(node.value.split())
return node
self.entry.traverse(count_words)
return self._word_count
- def __eq__(self, other):
+ def equals(self, other):
return self.entry.equals(
other.entry, ignored_fields=self.ignored_fields)
# Positions yielded by FluentChecker.check are absolute offsets from the
# beginning of the file. This is different from the base Checker behavior
# which yields offsets from the beginning of the current entity's value.
def position(self, pos=None):
if pos is None:
--- a/compare_locales/tests/test_checks.py
+++ b/compare_locales/tests/test_checks.py
@@ -1,17 +1,17 @@
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import unittest
from compare_locales.checks import getChecker
-from compare_locales.parser import getParser, Parser, Entity
+from compare_locales.parser import getParser, Parser, DTDEntity
from compare_locales.paths import File
class BaseHelper(unittest.TestCase):
file = None
refContent = None
def setUp(self):
@@ -235,26 +235,27 @@ class TestAndroid(unittest.TestCase):
"""
apos_msg = u"Apostrophes in Android DTDs need escaping with \\' or " + \
u"\\u0027, or use \u2019, or put string in quotes."
quot_msg = u"Quotes in Android DTDs need escaping with \\\" or " + \
u"\\u0022, or put string in apostrophes."
def getEntity(self, v):
ctx = Parser.Context(v)
- return Entity(ctx, lambda s: s, '', (0, len(v)), (), (), (),
- (0, len(v)), ())
+ return DTDEntity(
+ ctx, '', (0, len(v)), (), (), (), (0, len(v)), ())
def getDTDEntity(self, v):
v = v.replace('"', '"')
ctx = Parser.Context('<!ENTITY foo "%s">' % v)
- return Entity(ctx,
- lambda s: s, '',
- (0, len(v) + 16), (), (), (9, 12),
- (14, len(v) + 14), ())
+ return DTDEntity(
+ ctx,
+ '',
+ (0, len(v) + 16), (), (), (9, 12),
+ (14, len(v) + 14), ())
def test_android_dtd(self):
"""Testing the actual android checks. The logic is involved,
so this is a lot of nitty gritty detail tests.
"""
f = File("embedding/android/strings.dtd", "strings.dtd",
"embedding/android")
checker = getChecker(f, extra_tests=['android-dtd'])