cafe-grader-web Files · lib/assets/Lib/

cafe-grader-web
Location: cafe-grader-web/lib/assets/Lib/_sre.py

Commit Description:
merge with algo and add brython files that were missing
Commit Description:
merge with algo and add brython files that were missing
References:
r584:056392c00e67 java
File last commit:
r584:056392c00e67 java
Show/Diff file:
Action:
            
                    lib/assets/Lib/_sre.py
                
             | 1354 lines
             | 51.5 KiB
             | text/x-python 
             | PythonLexer
             | 
      
            History
        
           Show Full History
         |
          Annotation
         | Raw
         |
              
              Download
              
      # NOT_RPYTHON

      """

      A pure Python reimplementation of the _sre module from CPython 2.4

      Copyright 2005 Nik Haldimann, licensed under the MIT license

      This code is based on material licensed under CNRI's Python 1.6 license and

      copyrighted by: Copyright (c) 1997-2001 by Secret Labs AB

      """

      MAXREPEAT = 2147483648

      #import array

      import operator, sys

      from sre_constants import ATCODES, OPCODES, CHCODES

      from sre_constants import SRE_INFO_PREFIX, SRE_INFO_LITERAL

      from sre_constants import SRE_FLAG_UNICODE, SRE_FLAG_LOCALE

      import sys

      # Identifying as _sre from Python 2.3 or 2.4

      #if sys.version_info[:2] >= (2, 4):

      MAGIC = 20031017

      #else:

      #    MAGIC = 20030419

      # In _sre.c this is bytesize of the code word type of the C implementation.

      # There it's 2 for normal Python builds and more for wide unicode builds (large 

      # enough to hold a 32-bit UCS-4 encoded character). Since here in pure Python

      # we only see re bytecodes as Python longs, we shouldn't have to care about the

      # codesize. But sre_compile will compile some stuff differently depending on the

      # codesize (e.g., charsets).

      # starting with python 3.3  CODESIZE is 4

      #if sys.maxunicode == 65535:

      #    CODESIZE = 2

      #else:

      CODESIZE = 4

      copyright = "_sre.py 2.4c Copyright 2005 by Nik Haldimann"

      def getcodesize():

          return CODESIZE

      def compile(pattern, flags, code, groups=0, groupindex={}, indexgroup=[None]):

          """Compiles (or rather just converts) a pattern descriptor to a SRE_Pattern

          object. Actual compilation to opcodes happens in sre_compile."""

          return SRE_Pattern(pattern, flags, code, groups, groupindex, indexgroup)

      def getlower(char_ord, flags):

          if (char_ord < 128) or (flags & SRE_FLAG_UNICODE) \

                                    or (flags & SRE_FLAG_LOCALE and char_ord < 256):

              #return ord(unichr(char_ord).lower())

              return ord(chr(char_ord).lower())

          else:

              return char_ord

      class SRE_Pattern:

          def __init__(self, pattern, flags, code, groups=0, groupindex={}, indexgroup=[None]):

              self.pattern = pattern

              self.flags = flags

              self.groups = groups

              self.groupindex = groupindex # Maps group names to group indices

              self._indexgroup = indexgroup # Maps indices to group names

              self._code = code

          def match(self, string, pos=0, endpos=sys.maxsize):

              """If zero or more characters at the beginning of string match this

              regular expression, return a corresponding MatchObject instance. Return

              None if the string does not match the pattern."""

              state = _State(string, pos, endpos, self.flags)

              if state.match(self._code):

                  return SRE_Match(self, state)

              return None

          def search(self, string, pos=0, endpos=sys.maxsize):

              """Scan through string looking for a location where this regular

              expression produces a match, and return a corresponding MatchObject

              instance. Return None if no position in the string matches the

              pattern."""

              state = _State(string, pos, endpos, self.flags)

              if state.search(self._code):

                  return SRE_Match(self, state)

              else:

                  return None

          def findall(self, string, pos=0, endpos=sys.maxsize):

              """Return a list of all non-overlapping matches of pattern in string."""

              matchlist = []

              state = _State(string, pos, endpos, self.flags)

              while state.start <= state.end:

                  state.reset()

                  state.string_position = state.start

                  if not state.search(self._code):

                      break

                  match = SRE_Match(self, state)

                  if self.groups == 0 or self.groups == 1:

                      item = match.group(self.groups)

                  else:

                      item = match.groups("")

                  matchlist.append(item)

                  if state.string_position == state.start:

                      state.start += 1

                  else:

                      state.start = state.string_position

              return matchlist        

          def _subx(self, template, string, count=0, subn=False):

              filter = template

              if not callable(template) and "\\" in template:

                  # handle non-literal strings ; hand it over to the template compiler

                  #import sre  #sre was renamed to re

                  #fix me brython

                  #print("possible issue at _sre.py line 116")

                  import re as sre

                  filter = sre._subx(self, template)

              state = _State(string, 0, sys.maxsize, self.flags)

              sublist = []

              n = last_pos = 0

              while not count or n < count:

                  state.reset()

                  state.string_position = state.start

                  if not state.search(self._code):

                      break

                  if last_pos < state.start:

                      sublist.append(string[last_pos:state.start])

                  if not (last_pos == state.start and

                                      last_pos == state.string_position and n > 0):

                      # the above ignores empty matches on latest position

                      if callable(filter):

                          sublist.append(filter(SRE_Match(self, state)))

                      else:

                          sublist.append(filter)

                      last_pos = state.string_position

                      n += 1

                  if state.string_position == state.start:

                      state.start += 1

                  else:

                      state.start = state.string_position

              if last_pos < state.end:

                  sublist.append(string[last_pos:state.end])

              item = "".join(sublist)

              if subn:

                  return item, n

              else:

                  return item

          def sub(self, repl, string, count=0):

              """Return the string obtained by replacing the leftmost non-overlapping

              occurrences of pattern in string by the replacement repl."""

              return self._subx(repl, string, count, False)

          def subn(self, repl, string, count=0):

              """Return the tuple (new_string, number_of_subs_made) found by replacing

              the leftmost non-overlapping occurrences of pattern with the replacement

              repl."""

              return self._subx(repl, string, count, True)

          def split(self, string, maxsplit=0):

              """Split string by the occurrences of pattern."""

              splitlist = []

              state = _State(string, 0, sys.maxsize, self.flags)

              n = 0

              last = state.start

              while not maxsplit or n < maxsplit:

                  state.reset()

                  state.string_position = state.start

                  if not state.search(self._code):

                      break

                  if state.start == state.string_position: # zero-width match

                      if last == state.end:                # or end of string

                          break

                      state.start += 1

                      continue

                  splitlist.append(string[last:state.start])

                  # add groups (if any)

                  if self.groups:

                      match = SRE_Match(self, state)

                      splitlist.extend(list(match.groups(None)))

                  n += 1

                  last = state.start = state.string_position

              splitlist.append(string[last:state.end])

              return splitlist

          def finditer(self, string, pos=0, endpos=sys.maxsize):

              """Return a list of all non-overlapping matches of pattern in string."""

              #scanner = self.scanner(string, pos, endpos)

              _list=[]

              _m=self.scanner(string, pos, endpos)

              _re=SRE_Scanner(self, string, pos, endpos)

              _m=_re.search()

              while _m:

                 _list.append(_m)

                 _m=_re.search()

              return _list

              #return iter(scanner.search, None)

          def scanner(self, string, start=0, end=sys.maxsize):

              return SRE_Scanner(self, string, start, end)

          def __copy__(self):

              raise TypeError("cannot copy this pattern object")

          def __deepcopy__(self):

              raise TypeError("cannot copy this pattern object")

      class SRE_Scanner:

          """Undocumented scanner interface of sre."""

          def __init__(self, pattern, string, start, end):

              self.pattern = pattern

              self._state = _State(string, start, end, self.pattern.flags)

          def _match_search(self, matcher):

              state = self._state

              state.reset()

              state.string_position = state.start

              match = None

              if matcher(self.pattern._code):

                  match = SRE_Match(self.pattern, state)

              if match is None or state.string_position == state.start:

                  state.start += 1

              else:

                  state.start = state.string_position

              return match

          def match(self):

              return self._match_search(self._state.match)

          def search(self):

              return self._match_search(self._state.search)

      class SRE_Match:

          def __init__(self, pattern, state):

              self.re = pattern

              self.string = state.string

              self.pos = state.pos

              self.endpos = state.end

              self.lastindex = state.lastindex

              if self.lastindex < 0:

                  self.lastindex = None

              self.regs = self._create_regs(state)

              #statement below is not valid under python3 ( 0 <= None)

              #if pattern._indexgroup and 0 <= self.lastindex < len(pattern._indexgroup):

              if self.lastindex is not None and pattern._indexgroup and 0 <= self.lastindex < len(pattern._indexgroup):

                  # The above upper-bound check should not be necessary, as the re

                  # compiler is supposed to always provide an _indexgroup list long

                  # enough. But the re.Scanner class seems to screw up something

                  # there, test_scanner in test_re won't work without upper-bound

                  # checking. XXX investigate this and report bug to CPython.

                  self.lastgroup = pattern._indexgroup[self.lastindex]

              else:

                  self.lastgroup = None

          def _create_regs(self, state):

              """Creates a tuple of index pairs representing matched groups."""

              regs = [(state.start, state.string_position)]

              for group in range(self.re.groups):

                  mark_index = 2 * group

                  if mark_index + 1 < len(state.marks) \

                                          and state.marks[mark_index] is not None \

                                          and state.marks[mark_index + 1] is not None:

                      regs.append((state.marks[mark_index], state.marks[mark_index + 1]))

                  else:

                      regs.append((-1, -1))

              return tuple(regs)

          def _get_index(self, group):

              if isinstance(group, int):

                  if group >= 0 and group <= self.re.groups:

                      return group

              else:

                  if group in self.re.groupindex:

                      return self.re.groupindex[group]

              raise IndexError("no such group")

          def _get_slice(self, group, default):

              group_indices = self.regs[group]

              if group_indices[0] >= 0:

                  return self.string[group_indices[0]:group_indices[1]]

              else:

                  return default

          def start(self, group=0):

              """Returns the indices of the start of the substring matched by group;

              group defaults to zero (meaning the whole matched substring). Returns -1

              if group exists but did not contribute to the match."""

              return self.regs[self._get_index(group)][0]

          def end(self, group=0):

              """Returns the indices of the end of the substring matched by group;

              group defaults to zero (meaning the whole matched substring). Returns -1

              if group exists but did not contribute to the match."""

              return self.regs[self._get_index(group)][1]

          def span(self, group=0):

              """Returns the 2-tuple (m.start(group), m.end(group))."""

              return self.start(group), self.end(group)

          def expand(self, template):

              """Return the string obtained by doing backslash substitution and

              resolving group references on template."""

              import sre

              return sre._expand(self.re, self, template)

          def groups(self, default=None):

              """Returns a tuple containing all the subgroups of the match. The

              default argument is used for groups that did not participate in the

              match (defaults to None)."""

              groups = []

              for indices in self.regs[1:]:

                  if indices[0] >= 0:

                      groups.append(self.string[indices[0]:indices[1]])

                  else:

                      groups.append(default)

              return tuple(groups)

          def groupdict(self, default=None):

              """Return a dictionary containing all the named subgroups of the match.

              The default argument is used for groups that did not participate in the

              match (defaults to None)."""

              groupdict = {}

              for key, value in self.re.groupindex.items():

                  groupdict[key] = self._get_slice(value, default)

              return groupdict

          def group(self, *args):

              """Returns one or more subgroups of the match. Each argument is either a

              group index or a group name."""

              if len(args) == 0:

                  args = (0,)

              grouplist = []

              for group in args:

                  grouplist.append(self._get_slice(self._get_index(group), None))

              if len(grouplist) == 1:

                  return grouplist[0]

              else:

                  return tuple(grouplist)

          def __copy__():

              raise TypeError("cannot copy this pattern object")

          def __deepcopy__():

              raise TypeError("cannot copy this pattern object")

      class _State:

          def __init__(self, string, start, end, flags):

              self.string = string

              if start < 0:

                  start = 0

              if end > len(string):

                  end = len(string)

              self.start = start

              self.string_position = self.start

              self.end = end

              self.pos = start

              self.flags = flags

              self.reset()

          def reset(self):

              self.marks = []

              self.lastindex = -1

              self.marks_stack = []

              self.context_stack = []

              self.repeat = None

          def match(self, pattern_codes):

              # Optimization: Check string length. pattern_codes[3] contains the

              # minimum length for a string to possibly match.

              # brython..  the optimization doesn't work 

              #if pattern_codes[0] == OPCODES["info"] and pattern_codes[3]:

              #    if self.end - self.string_position < pattern_codes[3]:

              #        #_log("reject (got %d chars, need %d)"

              #        #         % (self.end - self.string_position, pattern_codes[3]))

              #        return False

              dispatcher = _OpcodeDispatcher()

              self.context_stack.append(_MatchContext(self, pattern_codes))

              has_matched = None

              while len(self.context_stack) > 0:

                  context = self.context_stack[-1]

                  has_matched = dispatcher.match(context)

                  if has_matched is not None: # don't pop if context isn't done

                      self.context_stack.pop()

              return has_matched

          def search(self, pattern_codes):

              flags = 0

              if pattern_codes[0] == OPCODES["info"]:

                  # optimization info block

                  # <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>

                  if pattern_codes[2] & SRE_INFO_PREFIX and pattern_codes[5] > 1:

                      return self.fast_search(pattern_codes)

                  flags = pattern_codes[2]

                  pattern_codes = pattern_codes[pattern_codes[1] + 1:]

              string_position = self.start

              if pattern_codes[0] == OPCODES["literal"]:

                  # Special case: Pattern starts with a literal character. This is

                  # used for short prefixes

                  character = pattern_codes[1]

                  while True:

                      while string_position < self.end \

                              and ord(self.string[string_position]) != character:

                          string_position += 1

                      if string_position >= self.end:

                          return False

                      self.start = string_position

                      string_position += 1

                      self.string_position = string_position

                      if flags & SRE_INFO_LITERAL:

                          return True

                      if self.match(pattern_codes[2:]):

                          return True

                  return False

              # General case

              while string_position <= self.end:

                  self.reset()

                  self.start = self.string_position = string_position

                  if self.match(pattern_codes):

                      return True

                  string_position += 1

              return False

          def fast_search(self, pattern_codes):

              """Skips forward in a string as fast as possible using information from

              an optimization info block."""

              # pattern starts with a known prefix

              # <5=length> <6=skip> <7=prefix data> <overlap data>

              flags = pattern_codes[2]

              prefix_len = pattern_codes[5]

              prefix_skip = pattern_codes[6] # don't really know what this is good for

              prefix = pattern_codes[7:7 + prefix_len]

              overlap = pattern_codes[7 + prefix_len - 1:pattern_codes[1] + 1]

              pattern_codes = pattern_codes[pattern_codes[1] + 1:]

              i = 0

              string_position = self.string_position

              while string_position < self.end:

                  while True:

                      if ord(self.string[string_position]) != prefix[i]:

                          if i == 0:

                              break

                          else:

                              i = overlap[i]

                      else:

                          i += 1

                          if i == prefix_len:

                              # found a potential match

                              self.start = string_position + 1 - prefix_len

                              self.string_position = string_position + 1 \

                                                           - prefix_len + prefix_skip

                              if flags & SRE_INFO_LITERAL:

                                  return True # matched all of pure literal pattern

                              if self.match(pattern_codes[2 * prefix_skip:]):

                                  return True

                              i = overlap[i]

                          break

                  string_position += 1

              return False

          def set_mark(self, mark_nr, position):

              if mark_nr & 1:

                  # This id marks the end of a group.

                  # fix python 3 division incompatability

                  #self.lastindex = mark_nr / 2 + 1

                  self.lastindex = mark_nr // 2 + 1

              if mark_nr >= len(self.marks):

                  self.marks.extend([None] * (mark_nr - len(self.marks) + 1))

              self.marks[mark_nr] = position

          def get_marks(self, group_index):

              marks_index = 2 * group_index

              if len(self.marks) > marks_index + 1:

                  return self.marks[marks_index], self.marks[marks_index + 1]

              else:

                  return None, None

          def marks_push(self):

              self.marks_stack.append((self.marks[:], self.lastindex))

          def marks_pop(self):

              self.marks, self.lastindex = self.marks_stack.pop()

          def marks_pop_keep(self):

              self.marks, self.lastindex = self.marks_stack[-1]

          def marks_pop_discard(self):

              self.marks_stack.pop()

          def lower(self, char_ord):

              return getlower(char_ord, self.flags)

      class _MatchContext:

          def __init__(self, state, pattern_codes):

              self.state = state

              self.pattern_codes = pattern_codes

              self.string_position = state.string_position

              self.code_position = 0

              self.has_matched = None

          def push_new_context(self, pattern_offset):

              """Creates a new child context of this context and pushes it on the

              stack. pattern_offset is the offset off the current code position to

              start interpreting from."""

              child_context = _MatchContext(self.state,

                  self.pattern_codes[self.code_position + pattern_offset:])

              #print("_sre.py:517:pushing new context") #, child_context.has_matched)

              #print(self.state.string_position)

              #print(self.pattern_codes[self.code_position + pattern_offset:])

              #print(pattern_offset)

              self.state.context_stack.append(child_context)

              return child_context

          def peek_char(self, peek=0):

              return self.state.string[self.string_position + peek]

          def skip_char(self, skip_count):

              self.string_position += skip_count

          def remaining_chars(self):

              return self.state.end - self.string_position

          def peek_code(self, peek=0):

              return self.pattern_codes[self.code_position + peek]

          def skip_code(self, skip_count):

              self.code_position += skip_count

          def remaining_codes(self):

              return len(self.pattern_codes) - self.code_position

          def at_beginning(self):

              return self.string_position == 0

          def at_end(self):

              return self.string_position == self.state.end

          def at_linebreak(self):

              return not self.at_end() and _is_linebreak(self.peek_char())

          def at_boundary(self, word_checker):

              if self.at_beginning() and self.at_end():

                  return False

              that = not self.at_beginning() and word_checker(self.peek_char(-1))

              this = not self.at_end() and word_checker(self.peek_char())

              return this != that

      class _RepeatContext(_MatchContext):

          def __init__(self, context):

              _MatchContext.__init__(self, context.state,

                                  context.pattern_codes[context.code_position:])

              self.count = -1

              #print('569:repeat', context.state.repeat)

              self.previous = context.state.repeat

              self.last_position = None

      class _Dispatcher:

          DISPATCH_TABLE = None

          def dispatch(self, code, context):

              method = self.DISPATCH_TABLE.get(code, self.__class__.unknown)

              return method(self, context)

          def unknown(self, code, ctx):

              raise NotImplementedError()

          def build_dispatch_table(cls, code_dict, method_prefix):

              if cls.DISPATCH_TABLE is not None:

                  return

              table = {}

              for key, value in code_dict.items():

                  if hasattr(cls, "%s%s" % (method_prefix, key)):

                      table[value] = getattr(cls, "%s%s" % (method_prefix, key))

              cls.DISPATCH_TABLE = table

          build_dispatch_table = classmethod(build_dispatch_table)

      class _OpcodeDispatcher(_Dispatcher):

          def __init__(self):

              self.executing_contexts = {}

              self.at_dispatcher = _AtcodeDispatcher()

              self.ch_dispatcher = _ChcodeDispatcher()

              self.set_dispatcher = _CharsetDispatcher()

          def match(self, context):

              """Returns True if the current context matches, False if it doesn't and

              None if matching is not finished, ie must be resumed after child

              contexts have been matched."""

              while context.remaining_codes() > 0 and context.has_matched is None:

                  opcode = context.peek_code()

                  if not self.dispatch(opcode, context):

                      return None

              if context.has_matched is None:

                  context.has_matched = False

              return context.has_matched

          def dispatch(self, opcode, context):

              """Dispatches a context on a given opcode. Returns True if the context

              is done matching, False if it must be resumed when next encountered."""

              #if self.executing_contexts.has_key(id(context)):

              if id(context) in self.executing_contexts:

                  generator = self.executing_contexts[id(context)]

                  del self.executing_contexts[id(context)]

                  has_finished = next(generator)

              else:

                  method = self.DISPATCH_TABLE.get(opcode, _OpcodeDispatcher.unknown)

                  has_finished = method(self, context)

                  if hasattr(has_finished, "__next__"): # avoid using the types module

                      generator = has_finished

                      has_finished = next(generator)

              if not has_finished:

                  self.executing_contexts[id(context)] = generator

              return has_finished

          def op_success(self, ctx):

              # end of pattern

              #self._log(ctx, "SUCCESS")

              ctx.state.string_position = ctx.string_position

              ctx.has_matched = True

              return True

          def op_failure(self, ctx):

              # immediate failure

              #self._log(ctx, "FAILURE")

              ctx.has_matched = False

              return True

          def general_op_literal(self, ctx, compare, decorate=lambda x: x):

              #print(ctx.peek_char())

              if ctx.at_end() or not compare(decorate(ord(ctx.peek_char())),

                                                  decorate(ctx.peek_code(1))):

                  ctx.has_matched = False

              ctx.skip_code(2)

              ctx.skip_char(1)

          def op_literal(self, ctx):

              # match literal string

              # <LITERAL> <code>

              #self._log(ctx, "LITERAL", ctx.peek_code(1))

              self.general_op_literal(ctx, operator.eq)

              return True

          def op_not_literal(self, ctx):

              # match anything that is not the given literal character

              # <NOT_LITERAL> <code>

              #self._log(ctx, "NOT_LITERAL", ctx.peek_code(1))

              self.general_op_literal(ctx, operator.ne)

              return True

          def op_literal_ignore(self, ctx):

              # match literal regardless of case

              # <LITERAL_IGNORE> <code>

              #self._log(ctx, "LITERAL_IGNORE", ctx.peek_code(1))

              self.general_op_literal(ctx, operator.eq, ctx.state.lower)

              return True

          def op_not_literal_ignore(self, ctx):

              # match literal regardless of case

              # <LITERAL_IGNORE> <code>

              #self._log(ctx, "LITERAL_IGNORE", ctx.peek_code(1))

              self.general_op_literal(ctx, operator.ne, ctx.state.lower)

              return True

          def op_at(self, ctx):

              # match at given position

              # <AT> <code>

              #self._log(ctx, "AT", ctx.peek_code(1))

              if not self.at_dispatcher.dispatch(ctx.peek_code(1), ctx):

                  ctx.has_matched = False

                  #print('_sre.py:line693, update context.has_matched variable')

                  return True

              ctx.skip_code(2)

              return True

          def op_category(self, ctx):

              # match at given category

              # <CATEGORY> <code>

              #self._log(ctx, "CATEGORY", ctx.peek_code(1))

              if ctx.at_end() or not self.ch_dispatcher.dispatch(ctx.peek_code(1), ctx):

                  ctx.has_matched = False

                  #print('_sre.py:line703, update context.has_matched variable')

                  return True

              ctx.skip_code(2)

              ctx.skip_char(1)

              return True

          def op_any(self, ctx):

              # match anything (except a newline)

              # <ANY>

              #self._log(ctx, "ANY")

              if ctx.at_end() or ctx.at_linebreak():

                  ctx.has_matched = False

                  #print('_sre.py:line714, update context.has_matched variable')

                  return True

              ctx.skip_code(1)

              ctx.skip_char(1)

              return True

          def op_any_all(self, ctx):

              # match anything

              # <ANY_ALL>

              #self._log(ctx, "ANY_ALL")

              if ctx.at_end():

                  ctx.has_matched = False

                  #print('_sre.py:line725, update context.has_matched variable')

                  return True

              ctx.skip_code(1)

              ctx.skip_char(1)

              return True

          def general_op_in(self, ctx, decorate=lambda x: x):

              #self._log(ctx, "OP_IN")

              #print('general_op_in')

              if ctx.at_end():

                  ctx.has_matched = False

                  #print('_sre.py:line734, update context.has_matched variable')

                  return

              skip = ctx.peek_code(1)

              ctx.skip_code(2) # set op pointer to the set code

              #print(ctx.peek_char(), ord(ctx.peek_char()), 

              #      decorate(ord(ctx.peek_char())))

              if not self.check_charset(ctx, decorate(ord(ctx.peek_char()))):

                  #print('_sre.py:line738, update context.has_matched variable')

                  ctx.has_matched = False

                  return

              ctx.skip_code(skip - 1)

              ctx.skip_char(1)

              #print('end:general_op_in')

          def op_in(self, ctx):

              # match set member (or non_member)

              # <IN> <skip> <set>

              #self._log(ctx, "OP_IN")

              self.general_op_in(ctx)

              return True

          def op_in_ignore(self, ctx):

              # match set member (or non_member), disregarding case of current char

              # <IN_IGNORE> <skip> <set>

              #self._log(ctx, "OP_IN_IGNORE")

              self.general_op_in(ctx, ctx.state.lower)

              return True

          def op_jump(self, ctx):

              # jump forward

              # <JUMP> <offset>

              #self._log(ctx, "JUMP", ctx.peek_code(1))

              ctx.skip_code(ctx.peek_code(1) + 1)

              return True

          # skip info

          # <INFO> <skip>

          op_info = op_jump

          def op_mark(self, ctx):

              # set mark

              # <MARK> <gid>

              #self._log(ctx, "OP_MARK", ctx.peek_code(1))

              ctx.state.set_mark(ctx.peek_code(1), ctx.string_position)

              ctx.skip_code(2)

              return True

          def op_branch(self, ctx):

              # alternation

              # <BRANCH> <0=skip> code <JUMP> ... <NULL>

              #self._log(ctx, "BRANCH")

              ctx.state.marks_push()

              ctx.skip_code(1)

              current_branch_length = ctx.peek_code(0)

              while current_branch_length:

                  # The following tries to shortcut branches starting with a

                  # (unmatched) literal. _sre.c also shortcuts charsets here.

                  if not (ctx.peek_code(1) == OPCODES["literal"] and \

                          (ctx.at_end() or ctx.peek_code(2) != ord(ctx.peek_char()))):

                      ctx.state.string_position = ctx.string_position

                      child_context = ctx.push_new_context(1)

                      #print("_sre.py:803:op_branch")

                      yield False

                      if child_context.has_matched:

                          ctx.has_matched = True

                          yield True

                      ctx.state.marks_pop_keep()

                  ctx.skip_code(current_branch_length)

                  current_branch_length = ctx.peek_code(0)

              ctx.state.marks_pop_discard()

              ctx.has_matched = False

              #print('_sre.py:line805, update context.has_matched variable')

              yield True

          def op_repeat_one(self, ctx):

              # match repeated sequence (maximizing).

              # this operator only works if the repeated item is exactly one character

              # wide, and we're not already collecting backtracking points.

              # <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail

              mincount = ctx.peek_code(2)

              maxcount = ctx.peek_code(3)

              #print("repeat one", mincount, maxcount)

              #self._log(ctx, "REPEAT_ONE", mincount, maxcount)

              if ctx.remaining_chars() < mincount:

                  ctx.has_matched = False

                  yield True

              ctx.state.string_position = ctx.string_position

              count = self.count_repetitions(ctx, maxcount)

              ctx.skip_char(count)

              if count < mincount:

                  ctx.has_matched = False

                  yield True

              if ctx.peek_code(ctx.peek_code(1) + 1) == OPCODES["success"]:

                  # tail is empty.  we're finished

                  ctx.state.string_position = ctx.string_position

                  ctx.has_matched = True

                  yield True

              ctx.state.marks_push()

              if ctx.peek_code(ctx.peek_code(1) + 1) == OPCODES["literal"]:

                  # Special case: Tail starts with a literal. Skip positions where

                  # the rest of the pattern cannot possibly match.

                  char = ctx.peek_code(ctx.peek_code(1) + 2)

                  while True:

                      while count >= mincount and \

                                      (ctx.at_end() or ord(ctx.peek_char()) != char):

                          ctx.skip_char(-1)

                          count -= 1

                      if count < mincount:

                          break

                      ctx.state.string_position = ctx.string_position

                      child_context = ctx.push_new_context(ctx.peek_code(1) + 1)

                      #print("_sre.py:856:push_new_context")

                      yield False

                      if child_context.has_matched:

                          ctx.has_matched = True

                          yield True

                      ctx.skip_char(-1)

                      count -= 1

                      ctx.state.marks_pop_keep()

              else:

                  # General case: backtracking

                  while count >= mincount:

                      ctx.state.string_position = ctx.string_position

                      child_context = ctx.push_new_context(ctx.peek_code(1) + 1)

                      yield False

                      if child_context.has_matched:

                          ctx.has_matched = True

                          yield True

                      ctx.skip_char(-1)

                      count -= 1

                      ctx.state.marks_pop_keep()

              ctx.state.marks_pop_discard()

              ctx.has_matched = False

              #ctx.has_matched = True      # <== this should be True (so match object gets returned to program)

              yield True

          def op_min_repeat_one(self, ctx):

              # match repeated sequence (minimizing)

              # <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail

              mincount = ctx.peek_code(2)

              maxcount = ctx.peek_code(3)

              #self._log(ctx, "MIN_REPEAT_ONE", mincount, maxcount)

              if ctx.remaining_chars() < mincount:

                  ctx.has_matched = False

                  yield True

              ctx.state.string_position = ctx.string_position

              if mincount == 0:

                  count = 0

              else:

                  count = self.count_repetitions(ctx, mincount)

                  if count < mincount:

                      ctx.has_matched = False

                      #print('_sre.py:line891, update context.has_matched variable')

                      yield True

                  ctx.skip_char(count)

              if ctx.peek_code(ctx.peek_code(1) + 1) == OPCODES["success"]:

                  # tail is empty.  we're finished

                  ctx.state.string_position = ctx.string_position

                  ctx.has_matched = True

                  yield True

              ctx.state.marks_push()

              while maxcount == MAXREPEAT or count <= maxcount:

                  ctx.state.string_position = ctx.string_position

                  child_context = ctx.push_new_context(ctx.peek_code(1) + 1)

                  #print('_sre.py:916:push new context')

                  yield False

                  if child_context.has_matched:

                      ctx.has_matched = True

                      yield True

                  ctx.state.string_position = ctx.string_position

                  if self.count_repetitions(ctx, 1) == 0:

                      break

                  ctx.skip_char(1)

                  count += 1

                  ctx.state.marks_pop_keep()

              ctx.state.marks_pop_discard()

              ctx.has_matched = False

              yield True

          def op_repeat(self, ctx):

              # create repeat context.  all the hard work is done by the UNTIL

              # operator (MAX_UNTIL, MIN_UNTIL)

              # <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail

              #self._log(ctx, "REPEAT", ctx.peek_code(2), ctx.peek_code(3))

              #if ctx.state.repeat is None:

              #   print("951:ctx.state.repeat is None")

              #   #ctx.state.repeat=_RepeatContext(ctx)

              repeat = _RepeatContext(ctx)

              ctx.state.repeat = repeat

              ctx.state.string_position = ctx.string_position

              child_context = ctx.push_new_context(ctx.peek_code(1) + 1)

              #print("_sre.py:941:push new context", id(child_context))

              #print(child_context.state.repeat)

              #print(ctx.state.repeat)

              # are these two yields causing the issue?

              yield False

              ctx.state.repeat = repeat.previous

              ctx.has_matched = child_context.has_matched

              yield True

          def op_max_until(self, ctx):

              # maximizing repeat

              # <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail

              repeat = ctx.state.repeat

              #print("op_max_until") #, id(ctx.state.repeat))

              if repeat is None:

                  #print(id(ctx), id(ctx.state))

                  raise RuntimeError("Internal re error: MAX_UNTIL without REPEAT.")

              mincount = repeat.peek_code(2)

              maxcount = repeat.peek_code(3)

              ctx.state.string_position = ctx.string_position

              count = repeat.count + 1

              #self._log(ctx, "MAX_UNTIL", count)

              if count < mincount:

                  # not enough matches

                  repeat.count = count

                  child_context = repeat.push_new_context(4)

                  yield False

                  ctx.has_matched = child_context.has_matched

                  if not ctx.has_matched:

                      repeat.count = count - 1

                      ctx.state.string_position = ctx.string_position

                  yield True

              if (count < maxcount or maxcount == MAXREPEAT) \

                            and ctx.state.string_position != repeat.last_position:

                  # we may have enough matches, if we can match another item, do so

                  repeat.count = count

                  ctx.state.marks_push()

                  save_last_position = repeat.last_position # zero-width match protection

                  repeat.last_position = ctx.state.string_position

                  child_context = repeat.push_new_context(4)

                  yield False

                  repeat.last_position = save_last_position

                  if child_context.has_matched:

                      ctx.state.marks_pop_discard()

                      ctx.has_matched = True

                      yield True

                  ctx.state.marks_pop()

                  repeat.count = count - 1

                  ctx.state.string_position = ctx.string_position

              # cannot match more repeated items here.  make sure the tail matches

              ctx.state.repeat = repeat.previous

              child_context = ctx.push_new_context(1)

              #print("_sre.py:987:op_max_until")

              yield False

              ctx.has_matched = child_context.has_matched

              if not ctx.has_matched:

                  ctx.state.repeat = repeat

                  ctx.state.string_position = ctx.string_position

              yield True

          def op_min_until(self, ctx):

              # minimizing repeat

              # <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail

              repeat = ctx.state.repeat

              if repeat is None:

                  raise RuntimeError("Internal re error: MIN_UNTIL without REPEAT.")

              mincount = repeat.peek_code(2)

              maxcount = repeat.peek_code(3)

              ctx.state.string_position = ctx.string_position

              count = repeat.count + 1

              #self._log(ctx, "MIN_UNTIL", count)

              if count < mincount:

                  # not enough matches

                  repeat.count = count

                  child_context = repeat.push_new_context(4)

                  yield False

                  ctx.has_matched = child_context.has_matched

                  if not ctx.has_matched:

                      repeat.count = count - 1

                      ctx.state.string_position = ctx.string_position

                  yield True

              # see if the tail matches

              ctx.state.marks_push()

              ctx.state.repeat = repeat.previous

              child_context = ctx.push_new_context(1)

              #print('_sre.py:1022:push new context')

              yield False

              if child_context.has_matched:

                  ctx.has_matched = True

                  yield True

              ctx.state.repeat = repeat

              ctx.state.string_position = ctx.string_position

              ctx.state.marks_pop()

              # match more until tail matches

              if count >= maxcount and maxcount != MAXREPEAT:

                  ctx.has_matched = False

                  #print('_sre.py:line1022, update context.has_matched variable')

                  yield True

              repeat.count = count

              child_context = repeat.push_new_context(4)

              yield False

              ctx.has_matched = child_context.has_matched

              if not ctx.has_matched:

                  repeat.count = count - 1

                  ctx.state.string_position = ctx.string_position

              yield True

          def general_op_groupref(self, ctx, decorate=lambda x: x):

              group_start, group_end = ctx.state.get_marks(ctx.peek_code(1))

              if group_start is None or group_end is None or group_end < group_start:

                  ctx.has_matched = False

                  return True

              while group_start < group_end:

                  if ctx.at_end() or decorate(ord(ctx.peek_char())) \

                                      != decorate(ord(ctx.state.string[group_start])):

                      ctx.has_matched = False

                      #print('_sre.py:line1042, update context.has_matched variable')

                      return True

                  group_start += 1

                  ctx.skip_char(1)

              ctx.skip_code(2)

              return True

          def op_groupref(self, ctx):

              # match backreference

              # <GROUPREF> <zero-based group index>

              #self._log(ctx, "GROUPREF", ctx.peek_code(1))

              return self.general_op_groupref(ctx)

          def op_groupref_ignore(self, ctx):

              # match backreference case-insensitive

              # <GROUPREF_IGNORE> <zero-based group index>

              #self._log(ctx, "GROUPREF_IGNORE", ctx.peek_code(1))

              return self.general_op_groupref(ctx, ctx.state.lower)

          def op_groupref_exists(self, ctx):

              # <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ...

              #self._log(ctx, "GROUPREF_EXISTS", ctx.peek_code(1))

              group_start, group_end = ctx.state.get_marks(ctx.peek_code(1))

              if group_start is None or group_end is None or group_end < group_start:

                  ctx.skip_code(ctx.peek_code(2) + 1)

              else:

                  ctx.skip_code(3)

              return True

          def op_assert(self, ctx):

              # assert subpattern

              # <ASSERT> <skip> <back> <pattern>

              #self._log(ctx, "ASSERT", ctx.peek_code(2))

              ctx.state.string_position = ctx.string_position - ctx.peek_code(2)

              if ctx.state.string_position < 0:

                  ctx.has_matched = False

                  yield True

              child_context = ctx.push_new_context(3)

              yield False

              if child_context.has_matched:

                  ctx.skip_code(ctx.peek_code(1) + 1)

              else:

                  ctx.has_matched = False

              yield True

          def op_assert_not(self, ctx):

              # assert not subpattern

              # <ASSERT_NOT> <skip> <back> <pattern>

              #self._log(ctx, "ASSERT_NOT", ctx.peek_code(2))

              ctx.state.string_position = ctx.string_position - ctx.peek_code(2)

              if ctx.state.string_position >= 0:

                  child_context = ctx.push_new_context(3)

                  yield False

                  if child_context.has_matched:

                      ctx.has_matched = False

                      yield True

              ctx.skip_code(ctx.peek_code(1) + 1)

              yield True

          def unknown(self, ctx):

              #self._log(ctx, "UNKNOWN", ctx.peek_code())

              raise RuntimeError("Internal re error. Unknown opcode: %s" % ctx.peek_code())

          def check_charset(self, ctx, char):

              """Checks whether a character matches set of arbitrary length. Assumes

              the code pointer is at the first member of the set."""

              self.set_dispatcher.reset(char)

              save_position = ctx.code_position

              result = None

              while result is None:

                  result = self.set_dispatcher.dispatch(ctx.peek_code(), ctx)

              ctx.code_position = save_position

              #print("_sre.py:1123:check_charset", result)

              return result

          def count_repetitions(self, ctx, maxcount):

              """Returns the number of repetitions of a single item, starting from the

              current string position. The code pointer is expected to point to a

              REPEAT_ONE operation (with the repeated 4 ahead)."""

              count = 0

              real_maxcount = ctx.state.end - ctx.string_position

              if maxcount < real_maxcount and maxcount != MAXREPEAT:

                  real_maxcount = maxcount

              # XXX could special case every single character pattern here, as in C.

              # This is a general solution, a bit hackisch, but works and should be

              # efficient.

              code_position = ctx.code_position

              string_position = ctx.string_position

              ctx.skip_code(4)

              reset_position = ctx.code_position

              while count < real_maxcount:

                  # this works because the single character pattern is followed by

                  # a success opcode

                  ctx.code_position = reset_position

                  self.dispatch(ctx.peek_code(), ctx)

                  #print("count_repetitions", ctx.has_matched, count)

                  if ctx.has_matched is False: # could be None as well

                      break

                  count += 1

              ctx.has_matched = None

              ctx.code_position = code_position

              ctx.string_position = string_position

              return count

          def _log(self, context, opname, *args):

              arg_string = ("%s " * len(args)) % args

              _log("|%s|%s|%s %s" % (context.pattern_codes,

                  context.string_position, opname, arg_string))

      _OpcodeDispatcher.build_dispatch_table(OPCODES, "op_")

      class _CharsetDispatcher(_Dispatcher):

          def __init__(self):

              self.ch_dispatcher = _ChcodeDispatcher()

          def reset(self, char):

              self.char = char

              self.ok = True

          def set_failure(self, ctx):

              return not self.ok

          def set_literal(self, ctx):

              # <LITERAL> <code>

              if ctx.peek_code(1) == self.char:

                  return self.ok

              else:

                  ctx.skip_code(2)

          def set_category(self, ctx):

              # <CATEGORY> <code>

              if self.ch_dispatcher.dispatch(ctx.peek_code(1), ctx):

                  return self.ok

              else:

                  ctx.skip_code(2)

          def set_charset(self, ctx):

              # <CHARSET> <bitmap> (16 bits per code word)

              char_code = self.char

              ctx.skip_code(1) # point to beginning of bitmap

              if CODESIZE == 2:

                  if char_code < 256 and ctx.peek_code(char_code >> 4) \

                                                  & (1 << (char_code & 15)):

                      return self.ok

                  ctx.skip_code(16) # skip bitmap

              else:

                  if char_code < 256 and ctx.peek_code(char_code >> 5) \

                                                  & (1 << (char_code & 31)):

                      return self.ok

                  ctx.skip_code(8) # skip bitmap

          def set_range(self, ctx):

              # <RANGE> <lower> <upper>

              if ctx.peek_code(1) <= self.char <= ctx.peek_code(2):

                  return self.ok

              ctx.skip_code(3)

          def set_negate(self, ctx):

              self.ok = not self.ok

              ctx.skip_code(1)

          #fixme brython.   array module doesn't exist

          def set_bigcharset(self, ctx):

              raise NotImplementationError("_sre.py: set_bigcharset, array not implemented")

              # <BIGCHARSET> <blockcount> <256 blockindices> <blocks>

              char_code = self.char

              count = ctx.peek_code(1)

              ctx.skip_code(2)

              if char_code < 65536:

                  block_index = char_code >> 8

                  # NB: there are CODESIZE block indices per bytecode

                  a = array.array("B")

                  a.fromstring(array.array(CODESIZE == 2 and "H" or "I",

                          [ctx.peek_code(block_index // CODESIZE)]).tostring())

                  block = a[block_index % CODESIZE]

                  ctx.skip_code(256 // CODESIZE) # skip block indices

                  block_value = ctx.peek_code(block * (32 // CODESIZE)

                          + ((char_code & 255) >> (CODESIZE == 2 and 4 or 5)))

                  if block_value & (1 << (char_code & ((8 * CODESIZE) - 1))):

                      return self.ok

              else:

                  ctx.skip_code(256 // CODESIZE) # skip block indices

              ctx.skip_code(count * (32 // CODESIZE)) # skip blocks

          def unknown(self, ctx):

              return False

      _CharsetDispatcher.build_dispatch_table(OPCODES, "set_")

      class _AtcodeDispatcher(_Dispatcher):

          def at_beginning(self, ctx):

              return ctx.at_beginning()

          at_beginning_string = at_beginning

          def at_beginning_line(self, ctx):

              return ctx.at_beginning() or _is_linebreak(ctx.peek_char(-1))

          def at_end(self, ctx):

              return (ctx.remaining_chars() == 1 and ctx.at_linebreak()) or ctx.at_end()

          def at_end_line(self, ctx):

              return ctx.at_linebreak() or ctx.at_end()

          def at_end_string(self, ctx):

              return ctx.at_end()

          def at_boundary(self, ctx):

              return ctx.at_boundary(_is_word)

          def at_non_boundary(self, ctx):

              return not ctx.at_boundary(_is_word)

          def at_loc_boundary(self, ctx):

              return ctx.at_boundary(_is_loc_word)

          def at_loc_non_boundary(self, ctx):

              return not ctx.at_boundary(_is_loc_word)

          def at_uni_boundary(self, ctx):

              return ctx.at_boundary(_is_uni_word)

          def at_uni_non_boundary(self, ctx):

              return not ctx.at_boundary(_is_uni_word)

          def unknown(self, ctx):

              return False

      _AtcodeDispatcher.build_dispatch_table(ATCODES, "")

      class _ChcodeDispatcher(_Dispatcher):

          def category_digit(self, ctx):

              return _is_digit(ctx.peek_char())

          def category_not_digit(self, ctx):

              return not _is_digit(ctx.peek_char())

          def category_space(self, ctx):

              return _is_space(ctx.peek_char())

          def category_not_space(self, ctx):

              return not _is_space(ctx.peek_char())

          def category_word(self, ctx):

              return _is_word(ctx.peek_char())

          def category_not_word(self, ctx):

              return not _is_word(ctx.peek_char())

          def category_linebreak(self, ctx):

              return _is_linebreak(ctx.peek_char())

          def category_not_linebreak(self, ctx):

              return not _is_linebreak(ctx.peek_char())

          def category_loc_word(self, ctx):

              return _is_loc_word(ctx.peek_char())

          def category_loc_not_word(self, ctx):

              return not _is_loc_word(ctx.peek_char())

          def category_uni_digit(self, ctx):

              return ctx.peek_char().isdigit()

          def category_uni_not_digit(self, ctx):

              return not ctx.peek_char().isdigit()

          def category_uni_space(self, ctx):

              return ctx.peek_char().isspace()

          def category_uni_not_space(self, ctx):

              return not ctx.peek_char().isspace()

          def category_uni_word(self, ctx):

              return _is_uni_word(ctx.peek_char())

          def category_uni_not_word(self, ctx):

              return not _is_uni_word(ctx.peek_char())

          def category_uni_linebreak(self, ctx):

              return ord(ctx.peek_char()) in _uni_linebreaks

          def category_uni_not_linebreak(self, ctx):

              return ord(ctx.peek_char()) not in _uni_linebreaks

          def unknown(self, ctx):

              return False

      _ChcodeDispatcher.build_dispatch_table(CHCODES, "")

      _ascii_char_info = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,

      2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,

      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,

      25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,

      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,

      0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,

      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 ]

      def _is_digit(char):

          code = ord(char)

          return code < 128 and _ascii_char_info[code] & 1

      def _is_space(char):

          code = ord(char)

          return code < 128 and _ascii_char_info[code] & 2

      def _is_word(char):

          # NB: non-ASCII chars aren't words according to _sre.c

          code = ord(char)

          return code < 128 and _ascii_char_info[code] & 16

      def _is_loc_word(char):

          return (not (ord(char) & ~255) and char.isalnum()) or char == '_'

      def _is_uni_word(char):

          # not valid in python 3

          #return unichr(ord(char)).isalnum() or char == '_'

          return chr(ord(char)).isalnum() or char == '_'

      def _is_linebreak(char):

          return char == "\n"

      # Static list of all unicode codepoints reported by Py_UNICODE_ISLINEBREAK.

      _uni_linebreaks = [10, 13, 28, 29, 30, 133, 8232, 8233]

      def _log(message):

          if 0:

              print(message)
	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
n r	New repository page
n g	New gist page
	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository permissions settings
Sign in to your account

Last Author