Bug 1399059 - Part 8 - Use named regex groups. r?Pike draft
authorStaś Małolepszy <stas@mozilla.com>
Thu, 21 Sep 2017 18:46:39 +0200
changeset 335 3fd23b5bf421316183d7500200dca2e3e7403eff
parent 333 72f81f3794c1062407ec1c46b40df1d720bcec5c
push id106
push usersmalolepszy@mozilla.com
push dateThu, 21 Sep 2017 17:03:20 +0000
reviewersPike
bugs1399059
Bug 1399059 - Part 8 - Use named regex groups. r?Pike MozReview-Commit-ID: 92afpT4DJjM
compare_locales/parser.py
--- a/compare_locales/parser.py
+++ b/compare_locales/parser.py
@@ -283,18 +283,17 @@ class Parser(object):
                 return Whitespace(ctx, (offset, white_end))
             else:
                 return None
         return Junk(ctx, (offset, junkend))
 
     def createEntity(self, ctx, m):
         pre_comment = self.last_comment
         self.last_comment = None
-        return Entity(ctx, pre_comment,
-                      *[m.span(i) for i in xrange(3)])
+        return Entity(ctx, pre_comment, m.span(), m.span('key'), m.span('val'))
 
     @classmethod
     def findDuplicates(cls, entities):
         found = Counter(entity.key for entity in entities)
         for entity_id, cnt in found.items():
             if cnt > 1:
                 yield '{} occurs {} times'.format(entity_id, cnt)
 
@@ -352,20 +351,20 @@ class DTDParser(Parser):
     #     [#x0300-#x036F] | [#x203F-#x2040]
     NameChar = NameStartChar + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040'
     Name = '[' + NameStartChar + '][' + NameChar + ']*'
     reKey = re.compile('<!ENTITY\s+(?P<key>' + Name + ')\s+'
                        '(?P<val>\"[^\"]*\"|\'[^\']*\'?)\s*>',
                        re.DOTALL | re.M)
     # add BOM to DTDs, details in bug 435002
     reHeader = re.compile(u'^\ufeff')
-    reComment = re.compile('(<!--(-?[%s])*?-->)' % CharMinusDash,
+    reComment = re.compile('<!--(?P<val>-?[%s])*?-->' % CharMinusDash,
                            re.S)
-    rePE = re.compile(u'<!ENTITY\s+%\s+(' + Name + ')\s+'
-                      u'SYSTEM\s+(\"[^\"]*\"|\'[^\']*\')\s*>\s*'
+    rePE = re.compile(u'<!ENTITY\s+%\s+(?P<key>' + Name + ')\s+'
+                      u'SYSTEM\s+(?P<val>\"[^\"]*\"|\'[^\']*\')\s*>\s*'
                       u'%' + Name + ';'
                       u'(?:[ \t]*(?:' + XmlComment + u'\s*)*\n?)?')
 
     def getNext(self, ctx, offset):
         '''
         Overload Parser.getNext to special-case ParsedEntities.
         Just check for a parsed entity if that method claims junk.
 
@@ -374,17 +373,18 @@ class DTDParser(Parser):
         '''
         if offset is 0 and self.reHeader.match(ctx.contents):
             offset += 1
         entity = Parser.getNext(self, ctx, offset)
         if (entity and isinstance(entity, Junk)) or entity is None:
             m = self.rePE.match(ctx.contents, offset)
             if m:
                 self.last_comment = None
-                entity = DTDEntity(ctx, '', *[m.span(i) for i in xrange(3)])
+                entity = DTDEntity(
+                    ctx, '', m.span(), m.span('key'), m.span('val'))
         return entity
 
     def createEntity(self, ctx, m):
         valspan = m.span('val')
         valspan = (valspan[0]+1, valspan[1]-1)
         pre_comment = self.last_comment
         self.last_comment = None
         return DTDEntity(ctx, pre_comment,
@@ -406,18 +406,19 @@ class PropertiesEntity(Entity):
                 return ''
             return self.known_escapes.get(found['single'], found['single'])
 
         return self.escape.sub(unescape, self.raw_val)
 
 
 class PropertiesParser(Parser):
     def __init__(self):
-        self.reKey = re.compile('([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
-        self.reComment = re.compile('([#!][^\n]*\n)*([#!][^\n]*)', re.M)
+        self.reKey = re.compile(
+            '(?P<key>[^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
+        self.reComment = re.compile('(?:[#!][^\n]*\n)*(?:[#!][^\n]*)', re.M)
         self._escapedEnd = re.compile(r'\\+$')
         self._trailingWS = re.compile(r'\s*[\n\Z]', re.M)
         Parser.__init__(self)
 
     def getNext(self, ctx, offset):
         # overwritten to parse values line by line
         contents = ctx.contents
 
@@ -454,17 +455,17 @@ class PropertiesParser(Parser):
                 endval = ws.start()
                 offset = ws.end()
 
             pre_comment = self.last_comment
             self.last_comment = None
             entity = PropertiesEntity(
                 ctx, pre_comment,
                 (m.start(), endval),   # full span
-                m.span(1),   # key span
+                m.span('key'),
                 (m.end(), endval))   # value span
             return entity
         return self.getTrailing(ctx, offset, self.reKey, self.reComment)
 
 
 class DefinesInstruction(EntityBase):
     '''Entity-like object representing processing instructions in inc files
     '''
@@ -483,18 +484,18 @@ class DefinesParser(Parser):
     tail = re.compile(r'(?!)')  # never match
     reWhitespace = re.compile('\n+', re.M)
 
     EMPTY_LINES = 1 << 0
     PAST_FIRST_LINE = 1 << 1
 
     def __init__(self):
         self.reComment = re.compile('(?:^# .*?\n)*(?:^# [^\n]*)', re.M)
-        self.reKey = re.compile('#define[ \t]+(\w+)(?:[ \t]+([^\n]*))?', re.M)
-        self.rePI = re.compile('#(\w+[ \t]+[^\n]+)', re.M)
+        self.reKey = re.compile('#define[ \t]+(?P<key>\w+)(?:[ \t]+(?P<val>[^\n]*))?', re.M)
+        self.rePI = re.compile('#(?P<val>\w+[ \t]+[^\n]+)', re.M)
         Parser.__init__(self)
 
     def getNext(self, ctx, offset):
         contents = ctx.contents
 
         m = self.reWhitespace.match(contents, offset)
         if m:
             if ctx.state & self.EMPTY_LINES:
@@ -511,17 +512,17 @@ class DefinesParser(Parser):
         if m:
             self.last_comment = Comment(ctx, m.span())
             return self.last_comment
         m = self.reKey.match(contents, offset)
         if m:
             return self.createEntity(ctx, m)
         m = self.rePI.match(contents, offset)
         if m:
-            instr = DefinesInstruction(ctx, m.span(), m.span(1))
+            instr = DefinesInstruction(ctx, m.span(), m.span('val'))
             if instr.val == 'filter emptyLines':
                 ctx.state |= self.EMPTY_LINES
             if instr.val == 'unfilter emptyLines':
                 ctx.state &= ~ self.EMPTY_LINES
             return instr
         return self.getTrailing(ctx, offset,
                                 self.reComment, self.reKey, self.rePI)
 
@@ -544,33 +545,33 @@ class IniParser(Parser):
     # initial comment
     [cat]
     whitespace*
     #comment
     string=value
     ...
     '''
     def __init__(self):
-        self.reComment = re.compile('(^[;#][^\n]*\n)*(^[;#][^\n]*)', re.M)
-        self.reSection = re.compile('\[(.*?)\]', re.M)
-        self.reKey = re.compile('(.+?)=(.*)', re.M)
+        self.reComment = re.compile('(?:^[;#][^\n]*\n)*(?:^[;#][^\n]*)', re.M)
+        self.reSection = re.compile('\[(?P<val>.*?)\]', re.M)
+        self.reKey = re.compile('(?P<key>.+?)=(?P<val>.*)', re.M)
         Parser.__init__(self)
 
     def getNext(self, ctx, offset):
         contents = ctx.contents
         m = self.reWhitespace.match(contents, offset)
         if m:
             return Whitespace(ctx, m.span())
         m = self.reComment.match(contents, offset)
         if m:
             self.last_comment = Comment(ctx, m.span())
             return self.last_comment
         m = self.reSection.match(contents, offset)
         if m:
-            return IniSection(ctx, m.span(), m.span(1))
+            return IniSection(ctx, m.span(), m.span('val'))
         m = self.reKey.match(contents, offset)
         if m:
             return self.createEntity(ctx, m)
         return self.getTrailing(ctx, offset,
                                 self.reComment, self.reSection, self.reKey)
 
 
 class FluentAttribute(EntityBase):