xmlschema_acue.etree

1 # -*- coding: utf-8 -*- 2 # 3 # Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). 4 # All rights reserved. 5 # This file is distributed under the terms of the MIT License. 6 # See the file 'LICENSE' in the root directory of the present 7 # distribution, or http://opensource.org/licenses/MIT. 8 # 9 # @author Davide Brunato <brunato@sissa.it> 10 # 11 """ 12 This module contains ElementTree setup and helpers for xmlschema package. 13 """ 14 from __future__ import unicode_literals 15 import sys 16 import re 17 import importlib 18 from collections import Counter 19 20 try: 21 import lxml.etree as lxml_etree 22 except ImportError: 23 lxml_etree = None 24 25 from .compat import PY3 26 from .exceptions import XMLSchemaValueError, XMLSchemaTypeError 27 from .namespaces import XSLT_NAMESPACE, HFP_NAMESPACE, VC_NAMESPACE 28 from .helpers import get_namespace, get_qname, qname_to_prefixed 29 from .xpath import ElementPathMixin 30 31 ### 32 # Programmatic import of xml.etree.ElementTree 33 # 34 # In Python 3 the pure python implementation is overwritten by the C module API, 35 # so use a programmatic re-import to obtain the pure Python module, necessary for 36 # defining a safer XMLParser. 37 # 38 if not PY3: 39 # Python 2.7: nothing have to be done because it's not overridden by C implementation 40 ElementTree = PyElementTree = importlib.import_module('xml.etree.ElementTree') 41 42 elif '_elementtree' in sys.modules: 43 # Temporary remove the loaded modules 44 ElementTree = sys.modules.pop('xml.etree.ElementTree', None) 45 _cmod = sys.modules.pop('_elementtree') 46 47 # Load the pure Python module 48 sys.modules['_elementtree'] = None 49 PyElementTree = importlib.import_module('xml.etree.ElementTree') 50 51 # Restore original modules 52 sys.modules['_elementtree'] = _cmod 53 if ElementTree is not None: 54 sys.modules['xml.etree.ElementTree'] = ElementTree 55 else: 56 ElementTree = PyElementTree 57 58 else: 59 # Load the pure Python module 60 sys.modules['_elementtree'] = None 61 PyElementTree = importlib.import_module('xml.etree.ElementTree') 62 63 # Remove the pure Python module from imported modules 64 del sys.modules['xml.etree.ElementTree'] 65 del sys.modules['_elementtree'] 66 67 # Load the C optimized ElementTree module 68 ElementTree = importlib.import_module('xml.etree.ElementTree') 69 70 71 # ElementTree APIs 72 etree_element = ElementTree.Element 73 etree_register_namespace = ElementTree.register_namespace 74 ParseError = ElementTree.ParseError 75 76 etree_register_namespace('xslt', XSLT_NAMESPACE) 77 etree_register_namespace('hfp', HFP_NAMESPACE) 78 etree_register_namespace('vc', VC_NAMESPACE) 79 80 81 # Pure Python ElementTree APIs 82 py_etree_element = PyElementTree.Element 83 py_etree_register_namespace = ElementTree.register_namespace 84 85 py_etree_register_namespace('xslt', XSLT_NAMESPACE) 86 py_etree_register_namespace('hfp', HFP_NAMESPACE) 87 py_etree_register_namespace('vc', VC_NAMESPACE) 88 89 90 # Lxml APIs 91 if lxml_etree is not None: 92 lxml_etree_element = lxml_etree.Element 93 lxml_etree_comment = lxml_etree.Comment 94 lxml_etree_register_namespace = lxml_etree.register_namespace 95 96 lxml_etree_register_namespace('xslt', XSLT_NAMESPACE) 97 lxml_etree_register_namespace('hfp', HFP_NAMESPACE) 98 lxml_etree_register_namespace('vc', VC_NAMESPACE) 99 else: 100 lxml_etree_element = None 101 lxml_etree_comment = None 102 lxml_etree_register_namespace = None 103 104

105 -class SafeXMLParser(PyElementTree.XMLParser):

106 """ 107 An XMLParser that forbids entities processing. Drops the *html* argument that is deprecated 108 since version 3.4. 109 110 :param target: the target object called by the `feed()` method of the parser, \ 111 that defaults to `TreeBuilder`. 112 :param encoding: if provided, its value overrides the encoding specified in the XML file. 113 """

114 - def __init__(self, target=None, encoding=None):

115 super(SafeXMLParser, self).__init__(target=target, encoding=encoding) 116 parser = self.parser if PY3 else self._parser 117 parser.EntityDeclHandler = self.entity_declaration 118 parser.UnparsedEntityDeclHandler = self.unparsed_entity_declaration 119 parser.ExternalEntityRefHandler = self.external_entity_reference

120

121 - def entity_declaration(self, entity_name, is_parameter_entity, value, base, system_id, public_id, notation_name):

122 raise PyElementTree.ParseError("Entities are forbidden (entity_name={!r})".format(entity_name))

123

124 - def unparsed_entity_declaration(self, entity_name, base, system_id, public_id, notation_name):

125 raise PyElementTree.ParseError("Entities are forbidden (entity_name={!r})".format(entity_name))

126

127 - def external_entity_reference(self, context, base, system_id, public_id):

128 raise PyElementTree.ParseError( 129 "External references are forbidden (system_id={!r}, public_id={!r})".format(system_id, public_id) 130 )

131 132

133 -def is_etree_element(elem):

134 """More safer test for matching ElementTree elements.""" 135 return hasattr(elem, 'tag') and hasattr(elem, 'attrib') and not isinstance(elem, ElementPathMixin)

136 137

138 -def etree_tostring(elem, namespaces=None, indent='', max_lines=None, spaces_for_tab=4, xml_declaration=False):

139 """ 140 Serialize an Element tree to a string. Tab characters are replaced by whitespaces. 141 142 :param elem: the Element instance. 143 :param namespaces: is an optional mapping from namespace prefix to URI. Provided namespaces are \ 144 registered before serialization. 145 :param indent: the base line indentation. 146 :param max_lines: if truncate serialization after a number of lines (default: do not truncate). 147 :param spaces_for_tab: number of spaces for replacing tab characters (default is 4). 148 :param xml_declaration: if set to `True` inserts the XML declaration at the head. 149 :return: a Unicode string. 150 """ 151 def reindent(line): 152 if not line: 153 return line 154 elif line.startswith(min_indent): 155 return line[start:] if start >= 0 else indent[start:] + line 156 else: 157 return indent + line

158 159 if isinstance(elem, etree_element): 160 if namespaces: 161 for prefix, uri in namespaces.items(): 162 etree_register_namespace(prefix, uri) 163 tostring = ElementTree.tostring 164 165 elif isinstance(elem, py_etree_element): 166 if namespaces: 167 for prefix, uri in namespaces.items(): 168 PyElementTree.register_namespace(prefix, uri) 169 tostring = PyElementTree.tostring 170 171 elif lxml_etree is not None: 172 if namespaces: 173 for prefix, uri in namespaces.items(): 174 if prefix: 175 lxml_etree_register_namespace(prefix, uri) 176 tostring = lxml_etree.tostring 177 else: 178 raise XMLSchemaTypeError("cannot serialize %r: lxml library not available." % type(elem)) 179 180 if PY3: 181 xml_text = tostring(elem, encoding="unicode").replace('\t', ' ' * spaces_for_tab) 182 else: 183 xml_text = unicode(tostring(elem)).replace('\t', ' ' * spaces_for_tab) # @UndefinedVariable 184 185 lines = ['<?xml version="1.0" encoding="UTF-8"?>'] if xml_declaration else [] 186 lines.extend(xml_text.splitlines()) 187 while lines and not lines[-1].strip(): 188 lines.pop(-1) 189 190 last_indent = ' ' * min(k for k in range(len(lines[-1])) if lines[-1][k] != ' ') 191 if len(lines) > 2: 192 child_indent = ' ' * min(k for line in lines[1:-1] for k in range(len(line)) if line[k] != ' ') 193 min_indent = min(child_indent, last_indent) 194 else: 195 min_indent = child_indent = last_indent 196 197 start = len(min_indent) - len(indent) 198 199 if max_lines is not None and len(lines) > max_lines + 2: 200 lines = lines[:max_lines] + [child_indent + '...'] * 2 + lines[-1:] 201 202 return '\n'.join(reindent(line) for line in lines) 203 204

205 -def etree_iterpath(elem, tag=None, path='.', namespaces=None, add_position=False):

206 """ 207 Creates an iterator for the element and its subelements that yield elements and paths. 208 If tag is not `None` or '*', only elements whose matches tag are returned from the iterator. 209 210 :param elem: the element to iterate. 211 :param tag: tag filtering. 212 :param path: the current path, '.' for default. 213 :param add_position: add context position to child elements that appear multiple times. 214 :param namespaces: is an optional mapping from namespace prefix to URI. 215 """ 216 if tag == "*": 217 tag = None 218 if tag is None or elem.tag == tag: 219 yield elem, path 220 221 if add_position: 222 children_tags = Counter([e.tag for e in elem]) 223 positions = Counter([t for t in children_tags if children_tags[t] > 1]) 224 else: 225 positions = () 226 227 for child in elem: 228 if callable(child.tag): 229 continue # Skip lxml comments 230 231 child_name = child.tag if namespaces is None else qname_to_prefixed(child.tag, namespaces) 232 if path == '/': 233 child_path = '/%s' % child_name 234 elif path: 235 child_path = '/'.join((path, child_name)) 236 else: 237 child_path = child_name 238 239 if child.tag in positions: 240 child_path += '[%d]' % positions[child.tag] 241 positions[child.tag] += 1 242 243 for _child, _child_path in etree_iterpath(child, tag, child_path, namespaces): 244 yield _child, _child_path

245 246

247 -def etree_getpath(elem, root, namespaces=None, relative=True, add_position=False):

248 """ 249 Returns the XPath path from *root* to descendant *elem* element. 250 251 :param elem: the descendant element. 252 :param root: the root element. 253 :param namespaces: is an optional mapping from namespace prefix to URI. 254 :param relative: returns a relative path. 255 :param add_position: add context position to child elements that appear multiple times. 256 :return: An XPath expression or `None` if *elem* is not a descendant of *root*. 257 """ 258 if relative: 259 path = '.' 260 elif namespaces: 261 path = '/%s' % qname_to_prefixed(root.tag, namespaces) 262 else: 263 path = '/%s' % root.tag 264 265 for e, path in etree_iterpath(root, elem.tag, path, namespaces, add_position): 266 if e is elem: 267 return path

268 269

270 -def etree_last_child(elem):

271 """Returns the last child of the element, ignoring children that are lxml comments.""" 272 for child in reversed(elem): 273 if not callable(child.tag): 274 return child

275 276

277 -def etree_child_index(elem, child):

278 """Return the index or raise ValueError if it is not a *child* of *elem*.""" 279 for index in range(len(elem)): 280 if elem[index] is child: 281 return index 282 raise XMLSchemaValueError("%r is not a child of %r" % (child, elem))

283 284

285 -def etree_elements_assert_equal(elem, other, strict=True, skip_comments=True):

286 """ 287 Tests the equality of two XML Element trees. 288 289 :param elem: the master Element tree, reference for namespace mapping. 290 :param other: the other Element tree that has to be compared. 291 :param strict: asserts strictly equality. `True` for default. 292 :param skip_comments: Skip comments for e 293 :raise: an AssertionError containing information about first difference encountered. 294 """ 295 _REGEX_SPACES = re.compile(r'\s+') 296 297 other_elements = iter(other.iter()) 298 namespace = '' 299 for e1 in elem.iter(): 300 if skip_comments and e1.tag is lxml_etree_comment: 301 continue 302 303 try: 304 e2 = next(other_elements) 305 except StopIteration: 306 assert False, "Second tree ends before the first: %r." % e1 307 308 if strict or e1 is elem: 309 assert e1.tag == e2.tag, "%r != %r: tags differ." % (e1, e2) 310 else: 311 namespace = get_namespace(e1.tag) or namespace 312 assert get_qname(namespace, e1.tag) == get_qname(namespace, e1.tag), "%r != %r: tags differ." % (e1, e2) 313 314 # Attributes 315 if e1.attrib != e2.attrib: 316 if strict: 317 raise AssertionError("%r != %r: attribute differ: %r != %r." % (e1, e2, e1.attrib, e2.attrib)) 318 else: 319 assert e1.attrib.keys() == e2.attrib.keys(), \ 320 "%r != %r: attribute keys differ: %r != %r." % (e1, e2, e1.attrib.keys(), e2.attrib.keys()) 321 for k in e1.attrib: 322 a1, a2 = e1.attrib[k].strip(), e2.attrib[k].strip() 323 if a1 != a2: 324 try: 325 assert float(a1) == float(a2) 326 except (AssertionError, ValueError, TypeError): 327 raise AssertionError( 328 "%r != %r: attribute %r differ: %r != %r." % (e1, e2, k, a1, a2) 329 ) 330 331 # Number of children 332 if skip_comments: 333 nc1 = len([c for c in e1 if c.tag is not lxml_etree_comment]) 334 nc2 = len([c for c in e2 if c.tag is not lxml_etree_comment]) 335 else: 336 nc1 = len(e1) 337 nc2 = len(e2) 338 assert nc1 == nc2, "%r != %r: children number differ: %r != %r." % (e1, e2, nc1, nc2) 339 340 # Text 341 if e1.text != e2.text: 342 message = "%r != %r: texts differ: %r != %r." % (e1, e2, e1.text, e2.text) 343 if strict: 344 raise AssertionError(message) 345 elif e1.text is None: 346 assert not e2.text.strip(), message 347 elif e2.text is None: 348 assert not e1.text.strip(), message 349 elif _REGEX_SPACES.sub(e1.text.strip(), '') != _REGEX_SPACES.sub(e2.text.strip(), ''): 350 try: 351 assert float(e1.text.strip()) == float(e2.text.strip()) 352 except (AssertionError, ValueError, TypeError): 353 raise AssertionError(message) 354 355 # Tail 356 if e1.tail != e2.tail: 357 message = "%r != %r: tails differ: %r != %r." % (e1, e2, e1.tail, e2.tail) 358 if strict: 359 raise AssertionError(message) 360 elif e1.tail is None: 361 assert not e2.tail.strip(), message 362 elif e2.text is None: 363 assert not e1.tail.strip(), message 364 else: 365 assert e1.tail.strip() == e2.tail.strip(), message 366 367 try: 368 e2 = next(other_elements) 369 except StopIteration: 370 pass 371 else: 372 assert False, "First tree ends before the second: %r." % e2

373

Copyright(C) 2019 Arno-Can Uestuensoez @Ingenieurbuero Arno-Can Uestuensoez	https://arnocan.wordpress.com
Generated by Epydoc 4.0.4 / Python-3.8 / fedora27 on Fri Dec 13 15:25:36 2019	http://epydoc.sourceforge.net

Source Code for Module xmlschema_acue.etree