Package xmlschema_acue ::
Module etree
1
2
3
4
5
6
7
8
9
10
11 """
12 This module contains ElementTree setup and helpers for xmlschema package.
13 """
14 from __future__ import unicode_literals
15 import sys
16 import re
17 import importlib
18 from collections import Counter
19
20 try:
21 import lxml.etree as lxml_etree
22 except ImportError:
23 lxml_etree = None
24
25 from .compat import PY3
26 from .exceptions import XMLSchemaValueError, XMLSchemaTypeError
27 from .namespaces import XSLT_NAMESPACE, HFP_NAMESPACE, VC_NAMESPACE
28 from .helpers import get_namespace, get_qname, qname_to_prefixed
29 from .xpath import ElementPathMixin
30
31
32
33
34
35
36
37
38 if not PY3:
39
40 ElementTree = PyElementTree = importlib.import_module('xml.etree.ElementTree')
41
42 elif '_elementtree' in sys.modules:
43
44 ElementTree = sys.modules.pop('xml.etree.ElementTree', None)
45 _cmod = sys.modules.pop('_elementtree')
46
47
48 sys.modules['_elementtree'] = None
49 PyElementTree = importlib.import_module('xml.etree.ElementTree')
50
51
52 sys.modules['_elementtree'] = _cmod
53 if ElementTree is not None:
54 sys.modules['xml.etree.ElementTree'] = ElementTree
55 else:
56 ElementTree = PyElementTree
57
58 else:
59
60 sys.modules['_elementtree'] = None
61 PyElementTree = importlib.import_module('xml.etree.ElementTree')
62
63
64 del sys.modules['xml.etree.ElementTree']
65 del sys.modules['_elementtree']
66
67
68 ElementTree = importlib.import_module('xml.etree.ElementTree')
69
70
71
72 etree_element = ElementTree.Element
73 etree_register_namespace = ElementTree.register_namespace
74 ParseError = ElementTree.ParseError
75
76 etree_register_namespace('xslt', XSLT_NAMESPACE)
77 etree_register_namespace('hfp', HFP_NAMESPACE)
78 etree_register_namespace('vc', VC_NAMESPACE)
79
80
81
82 py_etree_element = PyElementTree.Element
83 py_etree_register_namespace = ElementTree.register_namespace
84
85 py_etree_register_namespace('xslt', XSLT_NAMESPACE)
86 py_etree_register_namespace('hfp', HFP_NAMESPACE)
87 py_etree_register_namespace('vc', VC_NAMESPACE)
88
89
90
91 if lxml_etree is not None:
92 lxml_etree_element = lxml_etree.Element
93 lxml_etree_comment = lxml_etree.Comment
94 lxml_etree_register_namespace = lxml_etree.register_namespace
95
96 lxml_etree_register_namespace('xslt', XSLT_NAMESPACE)
97 lxml_etree_register_namespace('hfp', HFP_NAMESPACE)
98 lxml_etree_register_namespace('vc', VC_NAMESPACE)
99 else:
100 lxml_etree_element = None
101 lxml_etree_comment = None
102 lxml_etree_register_namespace = None
103
104
106 """
107 An XMLParser that forbids entities processing. Drops the *html* argument that is deprecated
108 since version 3.4.
109
110 :param target: the target object called by the `feed()` method of the parser, \
111 that defaults to `TreeBuilder`.
112 :param encoding: if provided, its value overrides the encoding specified in the XML file.
113 """
114 - def __init__(self, target=None, encoding=None):
120
121 - def entity_declaration(self, entity_name, is_parameter_entity, value, base, system_id, public_id, notation_name):
123
126
128 raise PyElementTree.ParseError(
129 "External references are forbidden (system_id={!r}, public_id={!r})".format(system_id, public_id)
130 )
131
132
134 """More safer test for matching ElementTree elements."""
135 return hasattr(elem, 'tag') and hasattr(elem, 'attrib') and not isinstance(elem, ElementPathMixin)
136
137
138 -def etree_tostring(elem, namespaces=None, indent='', max_lines=None, spaces_for_tab=4, xml_declaration=False):
139 """
140 Serialize an Element tree to a string. Tab characters are replaced by whitespaces.
141
142 :param elem: the Element instance.
143 :param namespaces: is an optional mapping from namespace prefix to URI. Provided namespaces are \
144 registered before serialization.
145 :param indent: the base line indentation.
146 :param max_lines: if truncate serialization after a number of lines (default: do not truncate).
147 :param spaces_for_tab: number of spaces for replacing tab characters (default is 4).
148 :param xml_declaration: if set to `True` inserts the XML declaration at the head.
149 :return: a Unicode string.
150 """
151 def reindent(line):
152 if not line:
153 return line
154 elif line.startswith(min_indent):
155 return line[start:] if start >= 0 else indent[start:] + line
156 else:
157 return indent + line
158
159 if isinstance(elem, etree_element):
160 if namespaces:
161 for prefix, uri in namespaces.items():
162 etree_register_namespace(prefix, uri)
163 tostring = ElementTree.tostring
164
165 elif isinstance(elem, py_etree_element):
166 if namespaces:
167 for prefix, uri in namespaces.items():
168 PyElementTree.register_namespace(prefix, uri)
169 tostring = PyElementTree.tostring
170
171 elif lxml_etree is not None:
172 if namespaces:
173 for prefix, uri in namespaces.items():
174 if prefix:
175 lxml_etree_register_namespace(prefix, uri)
176 tostring = lxml_etree.tostring
177 else:
178 raise XMLSchemaTypeError("cannot serialize %r: lxml library not available." % type(elem))
179
180 if PY3:
181 xml_text = tostring(elem, encoding="unicode").replace('\t', ' ' * spaces_for_tab)
182 else:
183 xml_text = unicode(tostring(elem)).replace('\t', ' ' * spaces_for_tab)
184
185 lines = ['<?xml version="1.0" encoding="UTF-8"?>'] if xml_declaration else []
186 lines.extend(xml_text.splitlines())
187 while lines and not lines[-1].strip():
188 lines.pop(-1)
189
190 last_indent = ' ' * min(k for k in range(len(lines[-1])) if lines[-1][k] != ' ')
191 if len(lines) > 2:
192 child_indent = ' ' * min(k for line in lines[1:-1] for k in range(len(line)) if line[k] != ' ')
193 min_indent = min(child_indent, last_indent)
194 else:
195 min_indent = child_indent = last_indent
196
197 start = len(min_indent) - len(indent)
198
199 if max_lines is not None and len(lines) > max_lines + 2:
200 lines = lines[:max_lines] + [child_indent + '...'] * 2 + lines[-1:]
201
202 return '\n'.join(reindent(line) for line in lines)
203
204
205 -def etree_iterpath(elem, tag=None, path='.', namespaces=None, add_position=False):
206 """
207 Creates an iterator for the element and its subelements that yield elements and paths.
208 If tag is not `None` or '*', only elements whose matches tag are returned from the iterator.
209
210 :param elem: the element to iterate.
211 :param tag: tag filtering.
212 :param path: the current path, '.' for default.
213 :param add_position: add context position to child elements that appear multiple times.
214 :param namespaces: is an optional mapping from namespace prefix to URI.
215 """
216 if tag == "*":
217 tag = None
218 if tag is None or elem.tag == tag:
219 yield elem, path
220
221 if add_position:
222 children_tags = Counter([e.tag for e in elem])
223 positions = Counter([t for t in children_tags if children_tags[t] > 1])
224 else:
225 positions = ()
226
227 for child in elem:
228 if callable(child.tag):
229 continue
230
231 child_name = child.tag if namespaces is None else qname_to_prefixed(child.tag, namespaces)
232 if path == '/':
233 child_path = '/%s' % child_name
234 elif path:
235 child_path = '/'.join((path, child_name))
236 else:
237 child_path = child_name
238
239 if child.tag in positions:
240 child_path += '[%d]' % positions[child.tag]
241 positions[child.tag] += 1
242
243 for _child, _child_path in etree_iterpath(child, tag, child_path, namespaces):
244 yield _child, _child_path
245
246
247 -def etree_getpath(elem, root, namespaces=None, relative=True, add_position=False):
248 """
249 Returns the XPath path from *root* to descendant *elem* element.
250
251 :param elem: the descendant element.
252 :param root: the root element.
253 :param namespaces: is an optional mapping from namespace prefix to URI.
254 :param relative: returns a relative path.
255 :param add_position: add context position to child elements that appear multiple times.
256 :return: An XPath expression or `None` if *elem* is not a descendant of *root*.
257 """
258 if relative:
259 path = '.'
260 elif namespaces:
261 path = '/%s' % qname_to_prefixed(root.tag, namespaces)
262 else:
263 path = '/%s' % root.tag
264
265 for e, path in etree_iterpath(root, elem.tag, path, namespaces, add_position):
266 if e is elem:
267 return path
268
269
271 """Returns the last child of the element, ignoring children that are lxml comments."""
272 for child in reversed(elem):
273 if not callable(child.tag):
274 return child
275
276
278 """Return the index or raise ValueError if it is not a *child* of *elem*."""
279 for index in range(len(elem)):
280 if elem[index] is child:
281 return index
282 raise XMLSchemaValueError("%r is not a child of %r" % (child, elem))
283
284
286 """
287 Tests the equality of two XML Element trees.
288
289 :param elem: the master Element tree, reference for namespace mapping.
290 :param other: the other Element tree that has to be compared.
291 :param strict: asserts strictly equality. `True` for default.
292 :param skip_comments: Skip comments for e
293 :raise: an AssertionError containing information about first difference encountered.
294 """
295 _REGEX_SPACES = re.compile(r'\s+')
296
297 other_elements = iter(other.iter())
298 namespace = ''
299 for e1 in elem.iter():
300 if skip_comments and e1.tag is lxml_etree_comment:
301 continue
302
303 try:
304 e2 = next(other_elements)
305 except StopIteration:
306 assert False, "Second tree ends before the first: %r." % e1
307
308 if strict or e1 is elem:
309 assert e1.tag == e2.tag, "%r != %r: tags differ." % (e1, e2)
310 else:
311 namespace = get_namespace(e1.tag) or namespace
312 assert get_qname(namespace, e1.tag) == get_qname(namespace, e1.tag), "%r != %r: tags differ." % (e1, e2)
313
314
315 if e1.attrib != e2.attrib:
316 if strict:
317 raise AssertionError("%r != %r: attribute differ: %r != %r." % (e1, e2, e1.attrib, e2.attrib))
318 else:
319 assert e1.attrib.keys() == e2.attrib.keys(), \
320 "%r != %r: attribute keys differ: %r != %r." % (e1, e2, e1.attrib.keys(), e2.attrib.keys())
321 for k in e1.attrib:
322 a1, a2 = e1.attrib[k].strip(), e2.attrib[k].strip()
323 if a1 != a2:
324 try:
325 assert float(a1) == float(a2)
326 except (AssertionError, ValueError, TypeError):
327 raise AssertionError(
328 "%r != %r: attribute %r differ: %r != %r." % (e1, e2, k, a1, a2)
329 )
330
331
332 if skip_comments:
333 nc1 = len([c for c in e1 if c.tag is not lxml_etree_comment])
334 nc2 = len([c for c in e2 if c.tag is not lxml_etree_comment])
335 else:
336 nc1 = len(e1)
337 nc2 = len(e2)
338 assert nc1 == nc2, "%r != %r: children number differ: %r != %r." % (e1, e2, nc1, nc2)
339
340
341 if e1.text != e2.text:
342 message = "%r != %r: texts differ: %r != %r." % (e1, e2, e1.text, e2.text)
343 if strict:
344 raise AssertionError(message)
345 elif e1.text is None:
346 assert not e2.text.strip(), message
347 elif e2.text is None:
348 assert not e1.text.strip(), message
349 elif _REGEX_SPACES.sub(e1.text.strip(), '') != _REGEX_SPACES.sub(e2.text.strip(), ''):
350 try:
351 assert float(e1.text.strip()) == float(e2.text.strip())
352 except (AssertionError, ValueError, TypeError):
353 raise AssertionError(message)
354
355
356 if e1.tail != e2.tail:
357 message = "%r != %r: tails differ: %r != %r." % (e1, e2, e1.tail, e2.tail)
358 if strict:
359 raise AssertionError(message)
360 elif e1.tail is None:
361 assert not e2.tail.strip(), message
362 elif e2.text is None:
363 assert not e1.tail.strip(), message
364 else:
365 assert e1.tail.strip() == e2.tail.strip(), message
366
367 try:
368 e2 = next(other_elements)
369 except StopIteration:
370 pass
371 else:
372 assert False, "First tree ends before the second: %r." % e2
373