Show More
Commit Description:
Merge pull request #17 from nattee/master...
Commit Description:
Merge pull request #17 from nattee/master
upgrade to current working snapshot
References:
File last commit:
Show/Diff file:
Action:
lib/assets/Lib/xml/sax/expatreader.py
| 417 lines
| 14.7 KiB
| text/x-python
| PythonLexer
|
r584 | """ | |||
SAX driver for the pyexpat C module. This driver works with | ||||
pyexpat.__version__ == '2.22'. | ||||
""" | ||||
version = "0.20" | ||||
from xml.sax._exceptions import * | ||||
from xml.sax.handler import feature_validation, feature_namespaces | ||||
from xml.sax.handler import feature_namespace_prefixes | ||||
from xml.sax.handler import feature_external_ges, feature_external_pes | ||||
from xml.sax.handler import feature_string_interning | ||||
from xml.sax.handler import property_xml_string, property_interning_dict | ||||
# xml.parsers.expat does not raise ImportError in Jython | ||||
import sys | ||||
if sys.platform[:4] == "java": | ||||
raise SAXReaderNotAvailable("expat not available in Java", None) | ||||
del sys | ||||
try: | ||||
from xml.parsers import expat | ||||
except ImportError: | ||||
raise SAXReaderNotAvailable("expat not supported", None) | ||||
else: | ||||
if not hasattr(expat, "ParserCreate"): | ||||
raise SAXReaderNotAvailable("expat not supported", None) | ||||
from xml.sax import xmlreader, saxutils, handler | ||||
AttributesImpl = xmlreader.AttributesImpl | ||||
AttributesNSImpl = xmlreader.AttributesNSImpl | ||||
# If we're using a sufficiently recent version of Python, we can use | ||||
# weak references to avoid cycles between the parser and content | ||||
# handler, otherwise we'll just have to pretend. | ||||
try: | ||||
import _weakref | ||||
except ImportError: | ||||
def _mkproxy(o): | ||||
return o | ||||
else: | ||||
import weakref | ||||
_mkproxy = weakref.proxy | ||||
del weakref, _weakref | ||||
# --- ExpatLocator | ||||
class ExpatLocator(xmlreader.Locator): | ||||
"""Locator for use with the ExpatParser class. | ||||
This uses a weak reference to the parser object to avoid creating | ||||
a circular reference between the parser and the content handler. | ||||
""" | ||||
def __init__(self, parser): | ||||
self._ref = _mkproxy(parser) | ||||
def getColumnNumber(self): | ||||
parser = self._ref | ||||
if parser._parser is None: | ||||
return None | ||||
return parser._parser.ErrorColumnNumber | ||||
def getLineNumber(self): | ||||
parser = self._ref | ||||
if parser._parser is None: | ||||
return 1 | ||||
return parser._parser.ErrorLineNumber | ||||
def getPublicId(self): | ||||
parser = self._ref | ||||
if parser is None: | ||||
return None | ||||
return parser._source.getPublicId() | ||||
def getSystemId(self): | ||||
parser = self._ref | ||||
if parser is None: | ||||
return None | ||||
return parser._source.getSystemId() | ||||
# --- ExpatParser | ||||
class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): | ||||
"""SAX driver for the pyexpat C module.""" | ||||
def __init__(self, namespaceHandling=0, bufsize=2**16-20): | ||||
xmlreader.IncrementalParser.__init__(self, bufsize) | ||||
self._source = xmlreader.InputSource() | ||||
self._parser = None | ||||
self._namespaces = namespaceHandling | ||||
self._lex_handler_prop = None | ||||
self._parsing = 0 | ||||
self._entity_stack = [] | ||||
self._external_ges = 1 | ||||
self._interning = None | ||||
# XMLReader methods | ||||
def parse(self, source): | ||||
"Parse an XML document from a URL or an InputSource." | ||||
source = saxutils.prepare_input_source(source) | ||||
self._source = source | ||||
self.reset() | ||||
self._cont_handler.setDocumentLocator(ExpatLocator(self)) | ||||
xmlreader.IncrementalParser.parse(self, source) | ||||
def prepareParser(self, source): | ||||
if source.getSystemId() is not None: | ||||
self._parser.SetBase(source.getSystemId()) | ||||
# Redefined setContentHandler to allow changing handlers during parsing | ||||
def setContentHandler(self, handler): | ||||
xmlreader.IncrementalParser.setContentHandler(self, handler) | ||||
if self._parsing: | ||||
self._reset_cont_handler() | ||||
def getFeature(self, name): | ||||
if name == feature_namespaces: | ||||
return self._namespaces | ||||
elif name == feature_string_interning: | ||||
return self._interning is not None | ||||
elif name in (feature_validation, feature_external_pes, | ||||
feature_namespace_prefixes): | ||||
return 0 | ||||
elif name == feature_external_ges: | ||||
return self._external_ges | ||||
raise SAXNotRecognizedException("Feature '%s' not recognized" % name) | ||||
def setFeature(self, name, state): | ||||
if self._parsing: | ||||
raise SAXNotSupportedException("Cannot set features while parsing") | ||||
if name == feature_namespaces: | ||||
self._namespaces = state | ||||
elif name == feature_external_ges: | ||||
self._external_ges = state | ||||
elif name == feature_string_interning: | ||||
if state: | ||||
if self._interning is None: | ||||
self._interning = {} | ||||
else: | ||||
self._interning = None | ||||
elif name == feature_validation: | ||||
if state: | ||||
raise SAXNotSupportedException( | ||||
"expat does not support validation") | ||||
elif name == feature_external_pes: | ||||
if state: | ||||
raise SAXNotSupportedException( | ||||
"expat does not read external parameter entities") | ||||
elif name == feature_namespace_prefixes: | ||||
if state: | ||||
raise SAXNotSupportedException( | ||||
"expat does not report namespace prefixes") | ||||
else: | ||||
raise SAXNotRecognizedException( | ||||
"Feature '%s' not recognized" % name) | ||||
def getProperty(self, name): | ||||
if name == handler.property_lexical_handler: | ||||
return self._lex_handler_prop | ||||
elif name == property_interning_dict: | ||||
return self._interning | ||||
elif name == property_xml_string: | ||||
if self._parser: | ||||
if hasattr(self._parser, "GetInputContext"): | ||||
return self._parser.GetInputContext() | ||||
else: | ||||
raise SAXNotRecognizedException( | ||||
"This version of expat does not support getting" | ||||
" the XML string") | ||||
else: | ||||
raise SAXNotSupportedException( | ||||
"XML string cannot be returned when not parsing") | ||||
raise SAXNotRecognizedException("Property '%s' not recognized" % name) | ||||
def setProperty(self, name, value): | ||||
if name == handler.property_lexical_handler: | ||||
self._lex_handler_prop = value | ||||
if self._parsing: | ||||
self._reset_lex_handler_prop() | ||||
elif name == property_interning_dict: | ||||
self._interning = value | ||||
elif name == property_xml_string: | ||||
raise SAXNotSupportedException("Property '%s' cannot be set" % | ||||
name) | ||||
else: | ||||
raise SAXNotRecognizedException("Property '%s' not recognized" % | ||||
name) | ||||
# IncrementalParser methods | ||||
def feed(self, data, isFinal = 0): | ||||
if not self._parsing: | ||||
self.reset() | ||||
self._parsing = 1 | ||||
self._cont_handler.startDocument() | ||||
try: | ||||
# The isFinal parameter is internal to the expat reader. | ||||
# If it is set to true, expat will check validity of the entire | ||||
# document. When feeding chunks, they are not normally final - | ||||
# except when invoked from close. | ||||
self._parser.Parse(data, isFinal) | ||||
except expat.error as e: | ||||
exc = SAXParseException(expat.ErrorString(e.code), e, self) | ||||
# FIXME: when to invoke error()? | ||||
self._err_handler.fatalError(exc) | ||||
def close(self): | ||||
if self._entity_stack: | ||||
# If we are completing an external entity, do nothing here | ||||
return | ||||
self.feed("", isFinal = 1) | ||||
self._cont_handler.endDocument() | ||||
self._parsing = 0 | ||||
# break cycle created by expat handlers pointing to our methods | ||||
self._parser = None | ||||
bs = self._source.getByteStream() | ||||
if bs is not None: | ||||
bs.close() | ||||
def _reset_cont_handler(self): | ||||
self._parser.ProcessingInstructionHandler = \ | ||||
self._cont_handler.processingInstruction | ||||
self._parser.CharacterDataHandler = self._cont_handler.characters | ||||
def _reset_lex_handler_prop(self): | ||||
lex = self._lex_handler_prop | ||||
parser = self._parser | ||||
if lex is None: | ||||
parser.CommentHandler = None | ||||
parser.StartCdataSectionHandler = None | ||||
parser.EndCdataSectionHandler = None | ||||
parser.StartDoctypeDeclHandler = None | ||||
parser.EndDoctypeDeclHandler = None | ||||
else: | ||||
parser.CommentHandler = lex.comment | ||||
parser.StartCdataSectionHandler = lex.startCDATA | ||||
parser.EndCdataSectionHandler = lex.endCDATA | ||||
parser.StartDoctypeDeclHandler = self.start_doctype_decl | ||||
parser.EndDoctypeDeclHandler = lex.endDTD | ||||
def reset(self): | ||||
if self._namespaces: | ||||
self._parser = expat.ParserCreate(self._source.getEncoding(), " ", | ||||
intern=self._interning) | ||||
self._parser.namespace_prefixes = 1 | ||||
self._parser.StartElementHandler = self.start_element_ns | ||||
self._parser.EndElementHandler = self.end_element_ns | ||||
else: | ||||
self._parser = expat.ParserCreate(self._source.getEncoding(), | ||||
intern = self._interning) | ||||
self._parser.StartElementHandler = self.start_element | ||||
self._parser.EndElementHandler = self.end_element | ||||
self._reset_cont_handler() | ||||
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl | ||||
self._parser.NotationDeclHandler = self.notation_decl | ||||
self._parser.StartNamespaceDeclHandler = self.start_namespace_decl | ||||
self._parser.EndNamespaceDeclHandler = self.end_namespace_decl | ||||
self._decl_handler_prop = None | ||||
if self._lex_handler_prop: | ||||
self._reset_lex_handler_prop() | ||||
# self._parser.DefaultHandler = | ||||
# self._parser.DefaultHandlerExpand = | ||||
# self._parser.NotStandaloneHandler = | ||||
self._parser.ExternalEntityRefHandler = self.external_entity_ref | ||||
try: | ||||
self._parser.SkippedEntityHandler = self.skipped_entity_handler | ||||
except AttributeError: | ||||
# This pyexpat does not support SkippedEntity | ||||
pass | ||||
self._parser.SetParamEntityParsing( | ||||
expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) | ||||
self._parsing = 0 | ||||
self._entity_stack = [] | ||||
# Locator methods | ||||
def getColumnNumber(self): | ||||
if self._parser is None: | ||||
return None | ||||
return self._parser.ErrorColumnNumber | ||||
def getLineNumber(self): | ||||
if self._parser is None: | ||||
return 1 | ||||
return self._parser.ErrorLineNumber | ||||
def getPublicId(self): | ||||
return self._source.getPublicId() | ||||
def getSystemId(self): | ||||
return self._source.getSystemId() | ||||
# event handlers | ||||
def start_element(self, name, attrs): | ||||
self._cont_handler.startElement(name, AttributesImpl(attrs)) | ||||
def end_element(self, name): | ||||
self._cont_handler.endElement(name) | ||||
def start_element_ns(self, name, attrs): | ||||
pair = name.split() | ||||
if len(pair) == 1: | ||||
# no namespace | ||||
pair = (None, name) | ||||
elif len(pair) == 3: | ||||
pair = pair[0], pair[1] | ||||
else: | ||||
# default namespace | ||||
pair = tuple(pair) | ||||
newattrs = {} | ||||
qnames = {} | ||||
for (aname, value) in attrs.items(): | ||||
parts = aname.split() | ||||
length = len(parts) | ||||
if length == 1: | ||||
# no namespace | ||||
qname = aname | ||||
apair = (None, aname) | ||||
elif length == 3: | ||||
qname = "%s:%s" % (parts[2], parts[1]) | ||||
apair = parts[0], parts[1] | ||||
else: | ||||
# default namespace | ||||
qname = parts[1] | ||||
apair = tuple(parts) | ||||
newattrs[apair] = value | ||||
qnames[apair] = qname | ||||
self._cont_handler.startElementNS(pair, None, | ||||
AttributesNSImpl(newattrs, qnames)) | ||||
def end_element_ns(self, name): | ||||
pair = name.split() | ||||
if len(pair) == 1: | ||||
pair = (None, name) | ||||
elif len(pair) == 3: | ||||
pair = pair[0], pair[1] | ||||
else: | ||||
pair = tuple(pair) | ||||
self._cont_handler.endElementNS(pair, None) | ||||
# this is not used (call directly to ContentHandler) | ||||
def processing_instruction(self, target, data): | ||||
self._cont_handler.processingInstruction(target, data) | ||||
# this is not used (call directly to ContentHandler) | ||||
def character_data(self, data): | ||||
self._cont_handler.characters(data) | ||||
def start_namespace_decl(self, prefix, uri): | ||||
self._cont_handler.startPrefixMapping(prefix, uri) | ||||
def end_namespace_decl(self, prefix): | ||||
self._cont_handler.endPrefixMapping(prefix) | ||||
def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): | ||||
self._lex_handler_prop.startDTD(name, pubid, sysid) | ||||
def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): | ||||
self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) | ||||
def notation_decl(self, name, base, sysid, pubid): | ||||
self._dtd_handler.notationDecl(name, pubid, sysid) | ||||
def external_entity_ref(self, context, base, sysid, pubid): | ||||
if not self._external_ges: | ||||
return 1 | ||||
source = self._ent_handler.resolveEntity(pubid, sysid) | ||||
source = saxutils.prepare_input_source(source, | ||||
self._source.getSystemId() or | ||||
"") | ||||
self._entity_stack.append((self._parser, self._source)) | ||||
self._parser = self._parser.ExternalEntityParserCreate(context) | ||||
self._source = source | ||||
try: | ||||
xmlreader.IncrementalParser.parse(self, source) | ||||
except: | ||||
return 0 # FIXME: save error info here? | ||||
(self._parser, self._source) = self._entity_stack[-1] | ||||
del self._entity_stack[-1] | ||||
return 1 | ||||
def skipped_entity_handler(self, name, is_pe): | ||||
if is_pe: | ||||
# The SAX spec requires to report skipped PEs with a '%' | ||||
name = '%'+name | ||||
self._cont_handler.skippedEntity(name) | ||||
# --- | ||||
def create_parser(*args, **kwargs): | ||||
return ExpatParser(*args, **kwargs) | ||||
# --- | ||||
if __name__ == "__main__": | ||||
import xml.sax.saxutils | ||||
p = create_parser() | ||||
p.setContentHandler(xml.sax.saxutils.XMLGenerator()) | ||||
p.setErrorHandler(xml.sax.ErrorHandler()) | ||||
p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml") | ||||