Show More
Commit Description:
Merge pull request #17 from nattee/master...
Commit Description:
Merge pull request #17 from nattee/master upgrade to current working snapshot
References:
File last commit:
Show/Diff file:
Action:
lib/assets/Lib/_sre.py | 1354 lines | 51.5 KiB | text/x-python | PythonLexer |
# NOT_RPYTHON
"""
A pure Python reimplementation of the _sre module from CPython 2.4
Copyright 2005 Nik Haldimann, licensed under the MIT license
This code is based on material licensed under CNRI's Python 1.6 license and
copyrighted by: Copyright (c) 1997-2001 by Secret Labs AB
"""
MAXREPEAT = 2147483648
#import array
import operator, sys
from sre_constants import ATCODES, OPCODES, CHCODES
from sre_constants import SRE_INFO_PREFIX, SRE_INFO_LITERAL
from sre_constants import SRE_FLAG_UNICODE, SRE_FLAG_LOCALE
import sys
# Identifying as _sre from Python 2.3 or 2.4
#if sys.version_info[:2] >= (2, 4):
MAGIC = 20031017
#else:
# MAGIC = 20030419
# In _sre.c this is bytesize of the code word type of the C implementation.
# There it's 2 for normal Python builds and more for wide unicode builds (large
# enough to hold a 32-bit UCS-4 encoded character). Since here in pure Python
# we only see re bytecodes as Python longs, we shouldn't have to care about the
# codesize. But sre_compile will compile some stuff differently depending on the
# codesize (e.g., charsets).
# starting with python 3.3 CODESIZE is 4
#if sys.maxunicode == 65535:
# CODESIZE = 2
#else:
CODESIZE = 4
copyright = "_sre.py 2.4c Copyright 2005 by Nik Haldimann"
def getcodesize():
return CODESIZE
def compile(pattern, flags, code, groups=0, groupindex={}, indexgroup=[None]):
"""Compiles (or rather just converts) a pattern descriptor to a SRE_Pattern
object. Actual compilation to opcodes happens in sre_compile."""
return SRE_Pattern(pattern, flags, code, groups, groupindex, indexgroup)
def getlower(char_ord, flags):
if (char_ord < 128) or (flags & SRE_FLAG_UNICODE) \
or (flags & SRE_FLAG_LOCALE and char_ord < 256):
#return ord(unichr(char_ord).lower())
return ord(chr(char_ord).lower())
else:
return char_ord
class SRE_Pattern:
def __init__(self, pattern, flags, code, groups=0, groupindex={}, indexgroup=[None]):
self.pattern = pattern
self.flags = flags
self.groups = groups
self.groupindex = groupindex # Maps group names to group indices
self._indexgroup = indexgroup # Maps indices to group names
self._code = code
def match(self, string, pos=0, endpos=sys.maxsize):
"""If zero or more characters at the beginning of string match this
regular expression, return a corresponding MatchObject instance. Return
None if the string does not match the pattern."""
state = _State(string, pos, endpos, self.flags)
if state.match(self._code):
return SRE_Match(self, state)
return None
def search(self, string, pos=0, endpos=sys.maxsize):
"""Scan through string looking for a location where this regular
expression produces a match, and return a corresponding MatchObject
instance. Return None if no position in the string matches the
pattern."""
state = _State(string, pos, endpos, self.flags)
if state.search(self._code):
return SRE_Match(self, state)
else:
return None
def findall(self, string, pos=0, endpos=sys.maxsize):
"""Return a list of all non-overlapping matches of pattern in string."""
matchlist = []
state = _State(string, pos, endpos, self.flags)
while state.start <= state.end:
state.reset()
state.string_position = state.start
if not state.search(self._code):
break
match = SRE_Match(self, state)
if self.groups == 0 or self.groups == 1:
item = match.group(self.groups)
else:
item = match.groups("")
matchlist.append(item)
if state.string_position == state.start:
state.start += 1
else:
state.start = state.string_position
return matchlist
def _subx(self, template, string, count=0, subn=False):
filter = template
if not callable(template) and "\\" in template:
# handle non-literal strings ; hand it over to the template compiler
#import sre #sre was renamed to re
#fix me brython
#print("possible issue at _sre.py line 116")
import re as sre
filter = sre._subx(self, template)
state = _State(string, 0, sys.maxsize, self.flags)
sublist = []
n = last_pos = 0
while not count or n < count:
state.reset()
state.string_position = state.start
if not state.search(self._code):
break
if last_pos < state.start:
sublist.append(string[last_pos:state.start])
if not (last_pos == state.start and
last_pos == state.string_position and n > 0):
# the above ignores empty matches on latest position
if callable(filter):
sublist.append(filter(SRE_Match(self, state)))
else:
sublist.append(filter)
last_pos = state.string_position
n += 1
if state.string_position == state.start:
state.start += 1
else:
state.start = state.string_position
if last_pos < state.end:
sublist.append(string[last_pos:state.end])
item = "".join(sublist)
if subn:
return item, n
else:
return item
def sub(self, repl, string, count=0):
"""Return the string obtained by replacing the leftmost non-overlapping
occurrences of pattern in string by the replacement repl."""
return self._subx(repl, string, count, False)
def subn(self, repl, string, count=0):
"""Return the tuple (new_string, number_of_subs_made) found by replacing
the leftmost non-overlapping occurrences of pattern with the replacement
repl."""
return self._subx(repl, string, count, True)
def split(self, string, maxsplit=0):
"""Split string by the occurrences of pattern."""
splitlist = []
state = _State(string, 0, sys.maxsize, self.flags)
n = 0
last = state.start
while not maxsplit or n < maxsplit:
state.reset()
state.string_position = state.start
if not state.search(self._code):
break
if state.start == state.string_position: # zero-width match
if last == state.end: # or end of string
break
state.start += 1
continue
splitlist.append(string[last:state.start])
# add groups (if any)
if self.groups:
match = SRE_Match(self, state)
splitlist.extend(list(match.groups(None)))
n += 1
last = state.start = state.string_position
splitlist.append(string[last:state.end])
return splitlist
def finditer(self, string, pos=0, endpos=sys.maxsize):
"""Return a list of all non-overlapping matches of pattern in string."""
#scanner = self.scanner(string, pos, endpos)
_list=[]
_m=self.scanner(string, pos, endpos)
_re=SRE_Scanner(self, string, pos, endpos)
_m=_re.search()
while _m:
_list.append(_m)
_m=_re.search()
return _list
#return iter(scanner.search, None)
def scanner(self, string, start=0, end=sys.maxsize):
return SRE_Scanner(self, string, start, end)
def __copy__(self):
raise TypeError("cannot copy this pattern object")
def __deepcopy__(self):
raise TypeError("cannot copy this pattern object")
class SRE_Scanner:
"""Undocumented scanner interface of sre."""
def __init__(self, pattern, string, start, end):
self.pattern = pattern
self._state = _State(string, start, end, self.pattern.flags)
def _match_search(self, matcher):
state = self._state
state.reset()
state.string_position = state.start
match = None
if matcher(self.pattern._code):
match = SRE_Match(self.pattern, state)
if match is None or state.string_position == state.start:
state.start += 1
else:
state.start = state.string_position
return match
def match(self):
return self._match_search(self._state.match)
def search(self):
return self._match_search(self._state.search)
class SRE_Match:
def __init__(self, pattern, state):
self.re = pattern
self.string = state.string
self.pos = state.pos
self.endpos = state.end
self.lastindex = state.lastindex
if self.lastindex < 0:
self.lastindex = None
self.regs = self._create_regs(state)
#statement below is not valid under python3 ( 0 <= None)
#if pattern._indexgroup and 0 <= self.lastindex < len(pattern._indexgroup):
if self.lastindex is not None and pattern._indexgroup and 0 <= self.lastindex < len(pattern._indexgroup):
# The above upper-bound check should not be necessary, as the re
# compiler is supposed to always provide an _indexgroup list long
# enough. But the re.Scanner class seems to screw up something
# there, test_scanner in test_re won't work without upper-bound
# checking. XXX investigate this and report bug to CPython.
self.lastgroup = pattern._indexgroup[self.lastindex]
else:
self.lastgroup = None
def _create_regs(self, state):
"""Creates a tuple of index pairs representing matched groups."""
regs = [(state.start, state.string_position)]
for group in range(self.re.groups):
mark_index = 2 * group
if mark_index + 1 < len(state.marks) \
and state.marks[mark_index] is not None \
and state.marks[mark_index + 1] is not None:
regs.append((state.marks[mark_index], state.marks[mark_index + 1]))
else:
regs.append((-1, -1))
return tuple(regs)
def _get_index(self, group):
if isinstance(group, int):
if group >= 0 and group <= self.re.groups:
return group
else:
if group in self.re.groupindex:
return self.re.groupindex[group]
raise IndexError("no such group")
def _get_slice(self, group, default):
group_indices = self.regs[group]
if group_indices[0] >= 0:
return self.string[group_indices[0]:group_indices[1]]
else:
return default
def start(self, group=0):
"""Returns the indices of the start of the substring matched by group;
group defaults to zero (meaning the whole matched substring). Returns -1
if group exists but did not contribute to the match."""
return self.regs[self._get_index(group)][0]
def end(self, group=0):
"""Returns the indices of the end of the substring matched by group;
group defaults to zero (meaning the whole matched substring). Returns -1
if group exists but did not contribute to the match."""
return self.regs[self._get_index(group)][1]
def span(self, group=0):
"""Returns the 2-tuple (m.start(group), m.end(group))."""
return self.start(group), self.end(group)
def expand(self, template):
"""Return the string obtained by doing backslash substitution and
resolving group references on template."""
import sre
return sre._expand(self.re, self, template)
def groups(self, default=None):
"""Returns a tuple containing all the subgroups of the match. The
default argument is used for groups that did not participate in the
match (defaults to None)."""
groups = []
for indices in self.regs[1:]:
if indices[0] >= 0:
groups.append(self.string[indices[0]:indices[1]])
else:
groups.append(default)
return tuple(groups)
def groupdict(self, default=None):
"""Return a dictionary containing all the named subgroups of the match.
The default argument is used for groups that did not participate in the
match (defaults to None)."""
groupdict = {}
for key, value in self.re.groupindex.items():
groupdict[key] = self._get_slice(value, default)
return groupdict
def group(self, *args):
"""Returns one or more subgroups of the match. Each argument is either a
group index or a group name."""
if len(args) == 0:
args = (0,)
grouplist = []
for group in args:
grouplist.append(self._get_slice(self._get_index(group), None))
if len(grouplist) == 1:
return grouplist[0]
else:
return tuple(grouplist)
def __copy__():
raise TypeError("cannot copy this pattern object")
def __deepcopy__():
raise TypeError("cannot copy this pattern object")
class _State:
def __init__(self, string, start, end, flags):
self.string = string
if start < 0:
start = 0
if end > len(string):
end = len(string)
self.start = start
self.string_position = self.start
self.end = end
self.pos = start
self.flags = flags
self.reset()
def reset(self):
self.marks = []
self.lastindex = -1
self.marks_stack = []
self.context_stack = []
self.repeat = None
def match(self, pattern_codes):
# Optimization: Check string length. pattern_codes[3] contains the
# minimum length for a string to possibly match.
# brython.. the optimization doesn't work
#if pattern_codes[0] == OPCODES["info"] and pattern_codes[3]:
# if self.end - self.string_position < pattern_codes[3]:
# #_log("reject (got %d chars, need %d)"
# # % (self.end - self.string_position, pattern_codes[3]))
# return False
dispatcher = _OpcodeDispatcher()
self.context_stack.append(_MatchContext(self, pattern_codes))
has_matched = None
while len(self.context_stack) > 0:
context = self.context_stack[-1]
has_matched = dispatcher.match(context)
if has_matched is not None: # don't pop if context isn't done
self.context_stack.pop()
return has_matched
def search(self, pattern_codes):
flags = 0
if pattern_codes[0] == OPCODES["info"]:
# optimization info block
# <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>
if pattern_codes[2] & SRE_INFO_PREFIX and pattern_codes[5] > 1:
return self.fast_search(pattern_codes)
flags = pattern_codes[2]
pattern_codes = pattern_codes[pattern_codes[1] + 1:]
string_position = self.start
if pattern_codes[0] == OPCODES["literal"]:
# Special case: Pattern starts with a literal character. This is
# used for short prefixes
character = pattern_codes[1]
while True:
while string_position < self.end \
and ord(self.string[string_position]) != character:
string_position += 1
if string_position >= self.end:
return False
self.start = string_position
string_position += 1
self.string_position = string_position
if flags & SRE_INFO_LITERAL:
return True
if self.match(pattern_codes[2:]):
return True
return False
# General case
while string_position <= self.end:
self.reset()
self.start = self.string_position = string_position
if self.match(pattern_codes):
return True
string_position += 1
return False
def fast_search(self, pattern_codes):
"""Skips forward in a string as fast as possible using information from
an optimization info block."""
# pattern starts with a known prefix
# <5=length> <6=skip> <7=prefix data> <overlap data>
flags = pattern_codes[2]
prefix_len = pattern_codes[5]
prefix_skip = pattern_codes[6] # don't really know what this is good for
prefix = pattern_codes[7:7 + prefix_len]
overlap = pattern_codes[7 + prefix_len - 1:pattern_codes[1] + 1]
pattern_codes = pattern_codes[pattern_codes[1] + 1:]
i = 0
string_position = self.string_position
while string_position < self.end:
while True:
if ord(self.string[string_position]) != prefix[i]:
if i == 0:
break
else:
i = overlap[i]
else:
i += 1
if i == prefix_len:
# found a potential match
self.start = string_position + 1 - prefix_len
self.string_position = string_position + 1 \
- prefix_len + prefix_skip
if flags & SRE_INFO_LITERAL:
return True # matched all of pure literal pattern
if self.match(pattern_codes[2 * prefix_skip:]):
return True
i = overlap[i]
break
string_position += 1
return False
def set_mark(self, mark_nr, position):
if mark_nr & 1:
# This id marks the end of a group.
# fix python 3 division incompatability
#self.lastindex = mark_nr / 2 + 1
self.lastindex = mark_nr // 2 + 1
if mark_nr >= len(self.marks):
self.marks.extend([None] * (mark_nr - len(self.marks) + 1))
self.marks[mark_nr] = position
def get_marks(self, group_index):
marks_index = 2 * group_index
if len(self.marks) > marks_index + 1:
return self.marks[marks_index], self.marks[marks_index + 1]
else:
return None, None
def marks_push(self):
self.marks_stack.append((self.marks[:], self.lastindex))
def marks_pop(self):
self.marks, self.lastindex = self.marks_stack.pop()
def marks_pop_keep(self):
self.marks, self.lastindex = self.marks_stack[-1]
def marks_pop_discard(self):
self.marks_stack.pop()
def lower(self, char_ord):
return getlower(char_ord, self.flags)
class _MatchContext:
def __init__(self, state, pattern_codes):
self.state = state
self.pattern_codes = pattern_codes
self.string_position = state.string_position
self.code_position = 0
self.has_matched = None
def push_new_context(self, pattern_offset):
"""Creates a new child context of this context and pushes it on the
stack. pattern_offset is the offset off the current code position to
start interpreting from."""
child_context = _MatchContext(self.state,
self.pattern_codes[self.code_position + pattern_offset:])
#print("_sre.py:517:pushing new context") #, child_context.has_matched)
#print(self.state.string_position)
#print(self.pattern_codes[self.code_position + pattern_offset:])
#print(pattern_offset)
self.state.context_stack.append(child_context)
return child_context
def peek_char(self, peek=0):
return self.state.string[self.string_position + peek]
def skip_char(self, skip_count):
self.string_position += skip_count
def remaining_chars(self):
return self.state.end - self.string_position
def peek_code(self, peek=0):
return self.pattern_codes[self.code_position + peek]
def skip_code(self, skip_count):
self.code_position += skip_count
def remaining_codes(self):
return len(self.pattern_codes) - self.code_position
def at_beginning(self):
return self.string_position == 0
def at_end(self):
return self.string_position == self.state.end
def at_linebreak(self):
return not self.at_end() and _is_linebreak(self.peek_char())
def at_boundary(self, word_checker):
if self.at_beginning() and self.at_end():
return False
that = not self.at_beginning() and word_checker(self.peek_char(-1))
this = not self.at_end() and word_checker(self.peek_char())
return this != that
class _RepeatContext(_MatchContext):
def __init__(self, context):
_MatchContext.__init__(self, context.state,
context.pattern_codes[context.code_position:])
self.count = -1
#print('569:repeat', context.state.repeat)
self.previous = context.state.repeat
self.last_position = None
class _Dispatcher:
DISPATCH_TABLE = None
def dispatch(self, code, context):
method = self.DISPATCH_TABLE.get(code, self.__class__.unknown)
return method(self, context)
def unknown(self, code, ctx):
raise NotImplementedError()
def build_dispatch_table(cls, code_dict, method_prefix):
if cls.DISPATCH_TABLE is not None:
return
table = {}
for key, value in code_dict.items():
if hasattr(cls, "%s%s" % (method_prefix, key)):
table[value] = getattr(cls, "%s%s" % (method_prefix, key))
cls.DISPATCH_TABLE = table
build_dispatch_table = classmethod(build_dispatch_table)
class _OpcodeDispatcher(_Dispatcher):
def __init__(self):
self.executing_contexts = {}
self.at_dispatcher = _AtcodeDispatcher()
self.ch_dispatcher = _ChcodeDispatcher()
self.set_dispatcher = _CharsetDispatcher()
def match(self, context):
"""Returns True if the current context matches, False if it doesn't and
None if matching is not finished, ie must be resumed after child
contexts have been matched."""
while context.remaining_codes() > 0 and context.has_matched is None:
opcode = context.peek_code()
if not self.dispatch(opcode, context):
return None
if context.has_matched is None:
context.has_matched = False
return context.has_matched
def dispatch(self, opcode, context):
"""Dispatches a context on a given opcode. Returns True if the context
is done matching, False if it must be resumed when next encountered."""
#if self.executing_contexts.has_key(id(context)):
if id(context) in self.executing_contexts:
generator = self.executing_contexts[id(context)]
del self.executing_contexts[id(context)]
has_finished = next(generator)
else:
method = self.DISPATCH_TABLE.get(opcode, _OpcodeDispatcher.unknown)
has_finished = method(self, context)
if hasattr(has_finished, "__next__"): # avoid using the types module
generator = has_finished
has_finished = next(generator)
if not has_finished:
self.executing_contexts[id(context)] = generator
return has_finished
def op_success(self, ctx):
# end of pattern
#self._log(ctx, "SUCCESS")
ctx.state.string_position = ctx.string_position
ctx.has_matched = True
return True
def op_failure(self, ctx):
# immediate failure
#self._log(ctx, "FAILURE")
ctx.has_matched = False
return True
def general_op_literal(self, ctx, compare, decorate=lambda x: x):
#print(ctx.peek_char())
if ctx.at_end() or not compare(decorate(ord(ctx.peek_char())),
decorate(ctx.peek_code(1))):
ctx.has_matched = False
ctx.skip_code(2)
ctx.skip_char(1)
def op_literal(self, ctx):
# match literal string
# <LITERAL> <code>
#self._log(ctx, "LITERAL", ctx.peek_code(1))
self.general_op_literal(ctx, operator.eq)
return True
def op_not_literal(self, ctx):
# match anything that is not the given literal character
# <NOT_LITERAL> <code>
#self._log(ctx, "NOT_LITERAL", ctx.peek_code(1))
self.general_op_literal(ctx, operator.ne)
return True
def op_literal_ignore(self, ctx):
# match literal regardless of case
# <LITERAL_IGNORE> <code>
#self._log(ctx, "LITERAL_IGNORE", ctx.peek_code(1))
self.general_op_literal(ctx, operator.eq, ctx.state.lower)
return True
def op_not_literal_ignore(self, ctx):
# match literal regardless of case
# <LITERAL_IGNORE> <code>
#self._log(ctx, "LITERAL_IGNORE", ctx.peek_code(1))
self.general_op_literal(ctx, operator.ne, ctx.state.lower)
return True
def op_at(self, ctx):
# match at given position
# <AT> <code>
#self._log(ctx, "AT", ctx.peek_code(1))
if not self.at_dispatcher.dispatch(ctx.peek_code(1), ctx):
ctx.has_matched = False
#print('_sre.py:line693, update context.has_matched variable')
return True
ctx.skip_code(2)
return True
def op_category(self, ctx):
# match at given category
# <CATEGORY> <code>
#self._log(ctx, "CATEGORY", ctx.peek_code(1))
if ctx.at_end() or not self.ch_dispatcher.dispatch(ctx.peek_code(1), ctx):
ctx.has_matched = False
#print('_sre.py:line703, update context.has_matched variable')
return True
ctx.skip_code(2)
ctx.skip_char(1)
return True
def op_any(self, ctx):
# match anything (except a newline)
# <ANY>
#self._log(ctx, "ANY")
if ctx.at_end() or ctx.at_linebreak():
ctx.has_matched = False
#print('_sre.py:line714, update context.has_matched variable')
return True
ctx.skip_code(1)
ctx.skip_char(1)
return True
def op_any_all(self, ctx):
# match anything
# <ANY_ALL>
#self._log(ctx, "ANY_ALL")
if ctx.at_end():
ctx.has_matched = False
#print('_sre.py:line725, update context.has_matched variable')
return True
ctx.skip_code(1)
ctx.skip_char(1)
return True
def general_op_in(self, ctx, decorate=lambda x: x):
#self._log(ctx, "OP_IN")
#print('general_op_in')
if ctx.at_end():
ctx.has_matched = False
#print('_sre.py:line734, update context.has_matched variable')
return
skip = ctx.peek_code(1)
ctx.skip_code(2) # set op pointer to the set code
#print(ctx.peek_char(), ord(ctx.peek_char()),
# decorate(ord(ctx.peek_char())))
if not self.check_charset(ctx, decorate(ord(ctx.peek_char()))):
#print('_sre.py:line738, update context.has_matched variable')
ctx.has_matched = False
return
ctx.skip_code(skip - 1)
ctx.skip_char(1)
#print('end:general_op_in')
def op_in(self, ctx):
# match set member (or non_member)
# <IN> <skip> <set>
#self._log(ctx, "OP_IN")
self.general_op_in(ctx)
return True
def op_in_ignore(self, ctx):
# match set member (or non_member), disregarding case of current char
# <IN_IGNORE> <skip> <set>
#self._log(ctx, "OP_IN_IGNORE")
self.general_op_in(ctx, ctx.state.lower)
return True
def op_jump(self, ctx):
# jump forward
# <JUMP> <offset>
#self._log(ctx, "JUMP", ctx.peek_code(1))
ctx.skip_code(ctx.peek_code(1) + 1)
return True
# skip info
# <INFO> <skip>
op_info = op_jump
def op_mark(self, ctx):
# set mark
# <MARK> <gid>
#self._log(ctx, "OP_MARK", ctx.peek_code(1))
ctx.state.set_mark(ctx.peek_code(1), ctx.string_position)
ctx.skip_code(2)
return True
def op_branch(self, ctx):
# alternation
# <BRANCH> <0=skip> code <JUMP> ... <NULL>
#self._log(ctx, "BRANCH")
ctx.state.marks_push()
ctx.skip_code(1)
current_branch_length = ctx.peek_code(0)
while current_branch_length:
# The following tries to shortcut branches starting with a
# (unmatched) literal. _sre.c also shortcuts charsets here.
if not (ctx.peek_code(1) == OPCODES["literal"] and \
(ctx.at_end() or ctx.peek_code(2) != ord(ctx.peek_char()))):
ctx.state.string_position = ctx.string_position
child_context = ctx.push_new_context(1)
#print("_sre.py:803:op_branch")
yield False
if child_context.has_matched:
ctx.has_matched = True
yield True
ctx.state.marks_pop_keep()
ctx.skip_code(current_branch_length)
current_branch_length = ctx.peek_code(0)
ctx.state.marks_pop_discard()
ctx.has_matched = False
#print('_sre.py:line805, update context.has_matched variable')
yield True
def op_repeat_one(self, ctx):
# match repeated sequence (maximizing).
# this operator only works if the repeated item is exactly one character
# wide, and we're not already collecting backtracking points.
# <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail
mincount = ctx.peek_code(2)
maxcount = ctx.peek_code(3)
#print("repeat one", mincount, maxcount)
#self._log(ctx, "REPEAT_ONE", mincount, maxcount)
if ctx.remaining_chars() < mincount:
ctx.has_matched = False
yield True
ctx.state.string_position = ctx.string_position
count = self.count_repetitions(ctx, maxcount)
ctx.skip_char(count)
if count < mincount:
ctx.has_matched = False
yield True
if ctx.peek_code(ctx.peek_code(1) + 1) == OPCODES["success"]:
# tail is empty. we're finished
ctx.state.string_position = ctx.string_position
ctx.has_matched = True
yield True
ctx.state.marks_push()
if ctx.peek_code(ctx.peek_code(1) + 1) == OPCODES["literal"]:
# Special case: Tail starts with a literal. Skip positions where
# the rest of the pattern cannot possibly match.
char = ctx.peek_code(ctx.peek_code(1) + 2)
while True:
while count >= mincount and \
(ctx.at_end() or ord(ctx.peek_char()) != char):
ctx.skip_char(-1)
count -= 1
if count < mincount:
break
ctx.state.string_position = ctx.string_position
child_context = ctx.push_new_context(ctx.peek_code(1) + 1)
#print("_sre.py:856:push_new_context")
yield False
if child_context.has_matched:
ctx.has_matched = True
yield True
ctx.skip_char(-1)
count -= 1
ctx.state.marks_pop_keep()
else:
# General case: backtracking
while count >= mincount:
ctx.state.string_position = ctx.string_position
child_context = ctx.push_new_context(ctx.peek_code(1) + 1)
yield False
if child_context.has_matched:
ctx.has_matched = True
yield True
ctx.skip_char(-1)
count -= 1
ctx.state.marks_pop_keep()
ctx.state.marks_pop_discard()
ctx.has_matched = False
#ctx.has_matched = True # <== this should be True (so match object gets returned to program)
yield True
def op_min_repeat_one(self, ctx):
# match repeated sequence (minimizing)
# <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail
mincount = ctx.peek_code(2)
maxcount = ctx.peek_code(3)
#self._log(ctx, "MIN_REPEAT_ONE", mincount, maxcount)
if ctx.remaining_chars() < mincount:
ctx.has_matched = False
yield True
ctx.state.string_position = ctx.string_position
if mincount == 0:
count = 0
else:
count = self.count_repetitions(ctx, mincount)
if count < mincount:
ctx.has_matched = False
#print('_sre.py:line891, update context.has_matched variable')
yield True
ctx.skip_char(count)
if ctx.peek_code(ctx.peek_code(1) + 1) == OPCODES["success"]:
# tail is empty. we're finished
ctx.state.string_position = ctx.string_position
ctx.has_matched = True
yield True
ctx.state.marks_push()
while maxcount == MAXREPEAT or count <= maxcount:
ctx.state.string_position = ctx.string_position
child_context = ctx.push_new_context(ctx.peek_code(1) + 1)
#print('_sre.py:916:push new context')
yield False
if child_context.has_matched:
ctx.has_matched = True
yield True
ctx.state.string_position = ctx.string_position
if self.count_repetitions(ctx, 1) == 0:
break
ctx.skip_char(1)
count += 1
ctx.state.marks_pop_keep()
ctx.state.marks_pop_discard()
ctx.has_matched = False
yield True
def op_repeat(self, ctx):
# create repeat context. all the hard work is done by the UNTIL
# operator (MAX_UNTIL, MIN_UNTIL)
# <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail
#self._log(ctx, "REPEAT", ctx.peek_code(2), ctx.peek_code(3))
#if ctx.state.repeat is None:
# print("951:ctx.state.repeat is None")
# #ctx.state.repeat=_RepeatContext(ctx)
repeat = _RepeatContext(ctx)
ctx.state.repeat = repeat
ctx.state.string_position = ctx.string_position
child_context = ctx.push_new_context(ctx.peek_code(1) + 1)
#print("_sre.py:941:push new context", id(child_context))
#print(child_context.state.repeat)
#print(ctx.state.repeat)
# are these two yields causing the issue?
yield False
ctx.state.repeat = repeat.previous
ctx.has_matched = child_context.has_matched
yield True
def op_max_until(self, ctx):
# maximizing repeat
# <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail
repeat = ctx.state.repeat
#print("op_max_until") #, id(ctx.state.repeat))
if repeat is None:
#print(id(ctx), id(ctx.state))
raise RuntimeError("Internal re error: MAX_UNTIL without REPEAT.")
mincount = repeat.peek_code(2)
maxcount = repeat.peek_code(3)
ctx.state.string_position = ctx.string_position
count = repeat.count + 1
#self._log(ctx, "MAX_UNTIL", count)
if count < mincount:
# not enough matches
repeat.count = count
child_context = repeat.push_new_context(4)
yield False
ctx.has_matched = child_context.has_matched
if not ctx.has_matched:
repeat.count = count - 1
ctx.state.string_position = ctx.string_position
yield True
if (count < maxcount or maxcount == MAXREPEAT) \
and ctx.state.string_position != repeat.last_position:
# we may have enough matches, if we can match another item, do so
repeat.count = count
ctx.state.marks_push()
save_last_position = repeat.last_position # zero-width match protection
repeat.last_position = ctx.state.string_position
child_context = repeat.push_new_context(4)
yield False
repeat.last_position = save_last_position
if child_context.has_matched:
ctx.state.marks_pop_discard()
ctx.has_matched = True
yield True
ctx.state.marks_pop()
repeat.count = count - 1
ctx.state.string_position = ctx.string_position
# cannot match more repeated items here. make sure the tail matches
ctx.state.repeat = repeat.previous
child_context = ctx.push_new_context(1)
#print("_sre.py:987:op_max_until")
yield False
ctx.has_matched = child_context.has_matched
if not ctx.has_matched:
ctx.state.repeat = repeat
ctx.state.string_position = ctx.string_position
yield True
def op_min_until(self, ctx):
# minimizing repeat
# <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail
repeat = ctx.state.repeat
if repeat is None:
raise RuntimeError("Internal re error: MIN_UNTIL without REPEAT.")
mincount = repeat.peek_code(2)
maxcount = repeat.peek_code(3)
ctx.state.string_position = ctx.string_position
count = repeat.count + 1
#self._log(ctx, "MIN_UNTIL", count)
if count < mincount:
# not enough matches
repeat.count = count
child_context = repeat.push_new_context(4)
yield False
ctx.has_matched = child_context.has_matched
if not ctx.has_matched:
repeat.count = count - 1
ctx.state.string_position = ctx.string_position
yield True
# see if the tail matches
ctx.state.marks_push()
ctx.state.repeat = repeat.previous
child_context = ctx.push_new_context(1)
#print('_sre.py:1022:push new context')
yield False
if child_context.has_matched:
ctx.has_matched = True
yield True
ctx.state.repeat = repeat
ctx.state.string_position = ctx.string_position
ctx.state.marks_pop()
# match more until tail matches
if count >= maxcount and maxcount != MAXREPEAT:
ctx.has_matched = False
#print('_sre.py:line1022, update context.has_matched variable')
yield True
repeat.count = count
child_context = repeat.push_new_context(4)
yield False
ctx.has_matched = child_context.has_matched
if not ctx.has_matched:
repeat.count = count - 1
ctx.state.string_position = ctx.string_position
yield True
def general_op_groupref(self, ctx, decorate=lambda x: x):
group_start, group_end = ctx.state.get_marks(ctx.peek_code(1))
if group_start is None or group_end is None or group_end < group_start:
ctx.has_matched = False
return True
while group_start < group_end:
if ctx.at_end() or decorate(ord(ctx.peek_char())) \
!= decorate(ord(ctx.state.string[group_start])):
ctx.has_matched = False
#print('_sre.py:line1042, update context.has_matched variable')
return True
group_start += 1
ctx.skip_char(1)
ctx.skip_code(2)
return True
def op_groupref(self, ctx):
# match backreference
# <GROUPREF> <zero-based group index>
#self._log(ctx, "GROUPREF", ctx.peek_code(1))
return self.general_op_groupref(ctx)
def op_groupref_ignore(self, ctx):
# match backreference case-insensitive
# <GROUPREF_IGNORE> <zero-based group index>
#self._log(ctx, "GROUPREF_IGNORE", ctx.peek_code(1))
return self.general_op_groupref(ctx, ctx.state.lower)
def op_groupref_exists(self, ctx):
# <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ...
#self._log(ctx, "GROUPREF_EXISTS", ctx.peek_code(1))
group_start, group_end = ctx.state.get_marks(ctx.peek_code(1))
if group_start is None or group_end is None or group_end < group_start:
ctx.skip_code(ctx.peek_code(2) + 1)
else:
ctx.skip_code(3)
return True
def op_assert(self, ctx):
# assert subpattern
# <ASSERT> <skip> <back> <pattern>
#self._log(ctx, "ASSERT", ctx.peek_code(2))
ctx.state.string_position = ctx.string_position - ctx.peek_code(2)
if ctx.state.string_position < 0:
ctx.has_matched = False
yield True
child_context = ctx.push_new_context(3)
yield False
if child_context.has_matched:
ctx.skip_code(ctx.peek_code(1) + 1)
else:
ctx.has_matched = False
yield True
def op_assert_not(self, ctx):
# assert not subpattern
# <ASSERT_NOT> <skip> <back> <pattern>
#self._log(ctx, "ASSERT_NOT", ctx.peek_code(2))
ctx.state.string_position = ctx.string_position - ctx.peek_code(2)
if ctx.state.string_position >= 0:
child_context = ctx.push_new_context(3)
yield False
if child_context.has_matched:
ctx.has_matched = False
yield True
ctx.skip_code(ctx.peek_code(1) + 1)
yield True
def unknown(self, ctx):
#self._log(ctx, "UNKNOWN", ctx.peek_code())
raise RuntimeError("Internal re error. Unknown opcode: %s" % ctx.peek_code())
def check_charset(self, ctx, char):
"""Checks whether a character matches set of arbitrary length. Assumes
the code pointer is at the first member of the set."""
self.set_dispatcher.reset(char)
save_position = ctx.code_position
result = None
while result is None:
result = self.set_dispatcher.dispatch(ctx.peek_code(), ctx)
ctx.code_position = save_position
#print("_sre.py:1123:check_charset", result)
return result
def count_repetitions(self, ctx, maxcount):
"""Returns the number of repetitions of a single item, starting from the
current string position. The code pointer is expected to point to a
REPEAT_ONE operation (with the repeated 4 ahead)."""
count = 0
real_maxcount = ctx.state.end - ctx.string_position
if maxcount < real_maxcount and maxcount != MAXREPEAT:
real_maxcount = maxcount
# XXX could special case every single character pattern here, as in C.
# This is a general solution, a bit hackisch, but works and should be
# efficient.
code_position = ctx.code_position
string_position = ctx.string_position
ctx.skip_code(4)
reset_position = ctx.code_position
while count < real_maxcount:
# this works because the single character pattern is followed by
# a success opcode
ctx.code_position = reset_position
self.dispatch(ctx.peek_code(), ctx)
#print("count_repetitions", ctx.has_matched, count)
if ctx.has_matched is False: # could be None as well
break
count += 1
ctx.has_matched = None
ctx.code_position = code_position
ctx.string_position = string_position
return count
def _log(self, context, opname, *args):
arg_string = ("%s " * len(args)) % args
_log("|%s|%s|%s %s" % (context.pattern_codes,
context.string_position, opname, arg_string))
_OpcodeDispatcher.build_dispatch_table(OPCODES, "op_")
class _CharsetDispatcher(_Dispatcher):
def __init__(self):
self.ch_dispatcher = _ChcodeDispatcher()
def reset(self, char):
self.char = char
self.ok = True
def set_failure(self, ctx):
return not self.ok
def set_literal(self, ctx):
# <LITERAL> <code>
if ctx.peek_code(1) == self.char:
return self.ok
else:
ctx.skip_code(2)
def set_category(self, ctx):
# <CATEGORY> <code>
if self.ch_dispatcher.dispatch(ctx.peek_code(1), ctx):
return self.ok
else:
ctx.skip_code(2)
def set_charset(self, ctx):
# <CHARSET> <bitmap> (16 bits per code word)
char_code = self.char
ctx.skip_code(1) # point to beginning of bitmap
if CODESIZE == 2:
if char_code < 256 and ctx.peek_code(char_code >> 4) \
& (1 << (char_code & 15)):
return self.ok
ctx.skip_code(16) # skip bitmap
else:
if char_code < 256 and ctx.peek_code(char_code >> 5) \
& (1 << (char_code & 31)):
return self.ok
ctx.skip_code(8) # skip bitmap
def set_range(self, ctx):
# <RANGE> <lower> <upper>
if ctx.peek_code(1) <= self.char <= ctx.peek_code(2):
return self.ok
ctx.skip_code(3)
def set_negate(self, ctx):
self.ok = not self.ok
ctx.skip_code(1)
#fixme brython. array module doesn't exist
def set_bigcharset(self, ctx):
raise NotImplementationError("_sre.py: set_bigcharset, array not implemented")
# <BIGCHARSET> <blockcount> <256 blockindices> <blocks>
char_code = self.char
count = ctx.peek_code(1)
ctx.skip_code(2)
if char_code < 65536:
block_index = char_code >> 8
# NB: there are CODESIZE block indices per bytecode
a = array.array("B")
a.fromstring(array.array(CODESIZE == 2 and "H" or "I",
[ctx.peek_code(block_index // CODESIZE)]).tostring())
block = a[block_index % CODESIZE]
ctx.skip_code(256 // CODESIZE) # skip block indices
block_value = ctx.peek_code(block * (32 // CODESIZE)
+ ((char_code & 255) >> (CODESIZE == 2 and 4 or 5)))
if block_value & (1 << (char_code & ((8 * CODESIZE) - 1))):
return self.ok
else:
ctx.skip_code(256 // CODESIZE) # skip block indices
ctx.skip_code(count * (32 // CODESIZE)) # skip blocks
def unknown(self, ctx):
return False
_CharsetDispatcher.build_dispatch_table(OPCODES, "set_")
class _AtcodeDispatcher(_Dispatcher):
def at_beginning(self, ctx):
return ctx.at_beginning()
at_beginning_string = at_beginning
def at_beginning_line(self, ctx):
return ctx.at_beginning() or _is_linebreak(ctx.peek_char(-1))
def at_end(self, ctx):
return (ctx.remaining_chars() == 1 and ctx.at_linebreak()) or ctx.at_end()
def at_end_line(self, ctx):
return ctx.at_linebreak() or ctx.at_end()
def at_end_string(self, ctx):
return ctx.at_end()
def at_boundary(self, ctx):
return ctx.at_boundary(_is_word)
def at_non_boundary(self, ctx):
return not ctx.at_boundary(_is_word)
def at_loc_boundary(self, ctx):
return ctx.at_boundary(_is_loc_word)
def at_loc_non_boundary(self, ctx):
return not ctx.at_boundary(_is_loc_word)
def at_uni_boundary(self, ctx):
return ctx.at_boundary(_is_uni_word)
def at_uni_non_boundary(self, ctx):
return not ctx.at_boundary(_is_uni_word)
def unknown(self, ctx):
return False
_AtcodeDispatcher.build_dispatch_table(ATCODES, "")
class _ChcodeDispatcher(_Dispatcher):
def category_digit(self, ctx):
return _is_digit(ctx.peek_char())
def category_not_digit(self, ctx):
return not _is_digit(ctx.peek_char())
def category_space(self, ctx):
return _is_space(ctx.peek_char())
def category_not_space(self, ctx):
return not _is_space(ctx.peek_char())
def category_word(self, ctx):
return _is_word(ctx.peek_char())
def category_not_word(self, ctx):
return not _is_word(ctx.peek_char())
def category_linebreak(self, ctx):
return _is_linebreak(ctx.peek_char())
def category_not_linebreak(self, ctx):
return not _is_linebreak(ctx.peek_char())
def category_loc_word(self, ctx):
return _is_loc_word(ctx.peek_char())
def category_loc_not_word(self, ctx):
return not _is_loc_word(ctx.peek_char())
def category_uni_digit(self, ctx):
return ctx.peek_char().isdigit()
def category_uni_not_digit(self, ctx):
return not ctx.peek_char().isdigit()
def category_uni_space(self, ctx):
return ctx.peek_char().isspace()
def category_uni_not_space(self, ctx):
return not ctx.peek_char().isspace()
def category_uni_word(self, ctx):
return _is_uni_word(ctx.peek_char())
def category_uni_not_word(self, ctx):
return not _is_uni_word(ctx.peek_char())
def category_uni_linebreak(self, ctx):
return ord(ctx.peek_char()) in _uni_linebreaks
def category_uni_not_linebreak(self, ctx):
return ord(ctx.peek_char()) not in _uni_linebreaks
def unknown(self, ctx):
return False
_ChcodeDispatcher.build_dispatch_table(CHCODES, "")
_ascii_char_info = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 ]
def _is_digit(char):
code = ord(char)
return code < 128 and _ascii_char_info[code] & 1
def _is_space(char):
code = ord(char)
return code < 128 and _ascii_char_info[code] & 2
def _is_word(char):
# NB: non-ASCII chars aren't words according to _sre.c
code = ord(char)
return code < 128 and _ascii_char_info[code] & 16
def _is_loc_word(char):
return (not (ord(char) & ~255) and char.isalnum()) or char == '_'
def _is_uni_word(char):
# not valid in python 3
#return unichr(ord(char)).isalnum() or char == '_'
return chr(ord(char)).isalnum() or char == '_'
def _is_linebreak(char):
return char == "\n"
# Static list of all unicode codepoints reported by Py_UNICODE_ISLINEBREAK.
_uni_linebreaks = [10, 13, 28, 29, 30, 133, 8232, 8233]
def _log(message):
if 0:
print(message)