Package xmlschema_acue ::
Module converters
1
2
3
4
5
6
7
8
9
10 """
11 This module contains converter classes and definitions.
12 """
13 from __future__ import unicode_literals
14 from collections import namedtuple, OrderedDict
15 import string
16
17 from xmlschema_acue.compat import ordered_dict_class, unicode_type
18 from xmlschema_acue.exceptions import XMLSchemaValueError
19 from xmlschema_acue.etree import etree_element, lxml_etree_element, etree_register_namespace, lxml_etree_register_namespace
20 from xmlschema_acue.namespaces import XSI_NAMESPACE
21 from xmlschema_acue.namespaces import NamespaceMapper
22
23 ElementData = namedtuple('ElementData', ['tag', 'text', 'content', 'attributes'])
24 "Namedtuple for Element data interchange between decoders and converters."
28 if isinstance(value, bool):
29 return 'true' if value else 'false'
30 elif isinstance(value, (list, tuple)):
31 return ' '.join(unicode_type(e) for e in value)
32 else:
33 return unicode_type(value)
34
37 """
38 Generic XML Schema based converter class. A converter is used to compose
39 decoded XML data for an Element into a data structure and to build an Element
40 from encoded data structure.
41
42 :param namespaces: map from namespace prefixes to URI.
43 :param dict_class: dictionary class to use for decoded data. Default is `dict`.
44 :param list_class: list class to use for decoded data. Default is `list`.
45 :param text_key: is the key to apply to element's decoded text data.
46 :param attr_prefix: controls the mapping of XML attributes, to the same name or \
47 with a prefix. If `None` the converter ignores attributes.
48 :param cdata_prefix: is used for including and prefixing the CDATA parts of a \
49 mixed content, that are labeled with an integer instead of a string. \
50 CDATA parts are ignored if this argument is `None`.
51 :param etree_element_class: the class that has to be used to create new XML elements, \
52 if not provided uses the ElementTree's Element class.
53 :param indent: number of spaces for XML indentation (default is 4).
54
55 :ivar dict: dictionary class to use for decoded data.
56 :ivar list: list class to use for decoded data.
57 :ivar text_key: key for decoded Element text
58 :ivar attr_prefix: prefix for attribute names
59 :ivar cdata_prefix: prefix for character data parts
60 :ivar etree_element_class: Element class to use
61 :ivar indent: indentation to use for rebuilding XML trees
62 """
63 - def __init__(self, namespaces=None, dict_class=None, list_class=None, text_key='$', attr_prefix='@',
64 cdata_prefix=None, etree_element_class=None, indent=4, **kwargs):
78
80 if name in ('attr_prefix', 'text_key', 'cdata_prefix'):
81 if value is not None and any(c in string.ascii_letters or c == '_' for c in value):
82 raise XMLSchemaValueError('%r cannot includes letters or underscores: %r' % (name, value))
83 elif name == 'attr_prefix':
84 self.ns_prefix = (value or '') + 'xmlns'
85 super(XMLSchemaConverter, self).__setattr__(name, value)
86
87 @property
89 """The converter can ignore some kind of XML data during decoding."""
90 return self.cdata_prefix and self.text_key and self.attr_prefix
91
92 @property
94 """
95 The format of decoded data is without loss of quality. Only losslessly formats can be
96 always used to encode to an XML data that is strictly conformant to the schema.
97 """
98 return False
99
100 - def copy(self, **kwargs):
101 return type(self)(
102 namespaces=kwargs.get('namespaces', self._namespaces),
103 dict_class=kwargs.get('dict_class', self.dict),
104 list_class=kwargs.get('list_class', self.list),
105 text_key=kwargs.get('text_key', self.text_key),
106 attr_prefix=kwargs.get('attr_prefix', self.attr_prefix),
107 cdata_prefix=kwargs.get('cdata_prefix', self.cdata_prefix),
108 etree_element_class=kwargs.get('etree_element_class'),
109 indent=kwargs.get('indent', self.indent),
110 )
111
113 """
114 Creates an iterator for converting decoded attributes to a data structure with
115 appropriate prefixes. If the instance has a not-empty map of namespaces registers
116 the mapped URIs and prefixes.
117
118 :param attributes: A sequence or an iterator of couples with the name of \
119 the attribute and the decoded value. Default is `None` (for `simpleType` \
120 elements, that don't have attributes).
121 """
122 if self.attr_prefix is None or not attributes:
123 return
124 elif self.attr_prefix:
125 for name, value in attributes:
126 yield '%s%s' % (self.attr_prefix, self.map_qname(name)), value
127 else:
128 for name, value in attributes:
129 yield self.map_qname(name), value
130
136
137 - def map_content(self, content):
138 """
139 A generator function for converting decoded content to a data structure.
140 If the instance has a not-empty map of namespaces registers the mapped URIs
141 and prefixes.
142
143 :param content: A sequence or an iterator of tuples with the name of the \
144 element, the decoded value and the `XsdElement` instance associated.
145 """
146 if not content:
147 return
148
149 map_qname = self.map_qname
150 for name, value, xsd_child in content:
151 try:
152 if name[0] == '{':
153 yield map_qname(name), value, xsd_child
154 else:
155 yield name, value, xsd_child
156 except TypeError:
157 if self.cdata_prefix is not None:
158 yield '%s%s' % (self.cdata_prefix, name), value, xsd_child
159
160 - def etree_element(self, tag, text=None, children=None, attrib=None, level=0):
161 """
162 Builds an ElementTree's Element using arguments and the element class and
163 the indent spacing stored in the converter instance.
164
165 :param tag: the Element tag string.
166 :param text: the Element text.
167 :param children: the list of Element children/subelements.
168 :param attrib: a dictionary with Element attributes.
169 :param level: the level related to the encoding process (0 means the root).
170 :return: an instance of the Element class setted for the converter instance.
171 """
172 if type(self.etree_element_class) is type(etree_element):
173 if attrib is None:
174 elem = self.etree_element_class(tag)
175 else:
176 elem = self.etree_element_class(tag, self.dict(attrib))
177 else:
178 nsmap = {prefix if prefix else None: uri for prefix, uri in self._namespaces.items()}
179 elem = self.etree_element_class(tag, OrderedDict(attrib), nsmap)
180
181 if children:
182 elem.extend(children)
183 elem.text = text or '\n' + ' ' * self.indent * (level + 1)
184 elem.tail = '\n' + ' ' * self.indent * level
185 else:
186 elem.text = text
187 elem.tail = '\n' + ' ' * self.indent * level
188
189 return elem
190
192 """
193 Converts a decoded element data to a data structure.
194
195 :param data: ElementData instance decoded from an Element node.
196 :param xsd_element: the `XsdElement` associated to decoded the data.
197 :param level: the level related to the decoding process (0 means the root).
198 :return: a data structure containing the decoded data.
199 """
200 result_dict = self.dict()
201 if level == 0 and xsd_element.is_global and self:
202 schema_namespaces = set(xsd_element.namespaces.values())
203 result_dict.update(
204 ('%s:%s' % (self.ns_prefix, k) if k else self.ns_prefix, v) for k, v in self.items()
205 if v in schema_namespaces or v == XSI_NAMESPACE
206 )
207
208 if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
209 if data.attributes:
210 result_dict.update(t for t in self.map_attributes(data.attributes))
211 if data.text is not None and data.text != '':
212 result_dict[self.text_key] = data.text
213 return result_dict
214 else:
215 return data.text if data.text != '' else None
216 else:
217 if data.attributes:
218 result_dict.update(t for t in self.map_attributes(data.attributes))
219
220 has_single_group = xsd_element.type.content_type.is_single()
221 list_types = list if self.list is list else (self.list, list)
222 for name, value, xsd_child in self.map_content(data.content):
223 try:
224 result = result_dict[name]
225 except KeyError:
226 if xsd_child is None or has_single_group and xsd_child.is_single():
227 result_dict[name] = value
228 else:
229 result_dict[name] = self.list([value])
230 else:
231 if not isinstance(result, list_types) or not result:
232 result_dict[name] = self.list([result, value])
233 elif isinstance(result[0], list_types) or not isinstance(value, list_types):
234 result.append(value)
235 else:
236 result_dict[name] = self.list([result, value])
237
238 return result_dict if result_dict else None
239
241 """
242 Extracts XML decoded data from a data structure for encoding into an ElementTree.
243
244 :param obj: the decoded object.
245 :param xsd_element: the `XsdElement` associated to the decoded data structure.
246 :param level: the level related to the encoding process (0 means the root).
247 :return: an ElementData instance.
248 """
249 tag = xsd_element.qualified_name if level == 0 else xsd_element.name
250
251 if not isinstance(obj, (self.dict, dict)):
252 if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
253 return ElementData(tag, obj, None, self.dict())
254 else:
255 return ElementData(tag, None, obj, self.dict())
256
257 unmap_qname = self.unmap_qname
258 unmap_attribute_qname = self._unmap_attribute_qname
259 text_key = self.text_key
260 attr_prefix = self.attr_prefix
261 ns_prefix = self.ns_prefix
262 cdata_prefix = self.cdata_prefix
263
264 text = None
265 content = []
266 attributes = self.dict()
267 for name, value in obj.items():
268 if text_key and name == text_key:
269 text = obj[text_key]
270 elif (cdata_prefix and name.startswith(cdata_prefix)) or \
271 name[0].isdigit() and cdata_prefix == '':
272 index = int(name[len(cdata_prefix):])
273 content.append((index, value))
274 elif name == ns_prefix:
275 self[''] = value
276 elif name.startswith('%s:' % ns_prefix):
277 self[name[len(ns_prefix)+1:]] = value
278 elif attr_prefix and name.startswith(attr_prefix):
279 name = name[len(attr_prefix):]
280 attributes[unmap_attribute_qname(name)] = value
281 elif not isinstance(value, (self.list, list)) or not value:
282 content.append((unmap_qname(name), value))
283 elif isinstance(value[0], (self.dict, dict, self.list, list)):
284 ns_name = unmap_qname(name)
285 for item in value:
286 content.append((ns_name, item))
287 else:
288 ns_name = unmap_qname(name)
289 for xsd_child in xsd_element.type.content_type.iter_elements():
290 matched_element = xsd_child.match(ns_name, self.get(''))
291 if matched_element is not None:
292 if matched_element.type.is_list():
293 content.append((ns_name, value))
294 else:
295 for item in value:
296 content.append((ns_name, item))
297 break
298 else:
299 if attr_prefix == '' and ns_name not in attributes:
300 for xsd_attribute in xsd_element.attributes.values():
301 if xsd_attribute.is_matching(ns_name):
302 attributes[ns_name] = value
303 break
304 else:
305 content.append((ns_name, value))
306 else:
307 content.append((ns_name, value))
308
309 return ElementData(tag, text, content, attributes)
310
313 """
314 XML Schema based converter class for Parker convention.
315
316 ref: http://wiki.open311.org/JSON_and_XML_Conversion/#the-parker-convention
317
318 :param namespaces: Map from namespace prefixes to URI.
319 :param dict_class: Dictionary class to use for decoded data. Default is `dict` for \
320 Python 3.6+ or `OrderedDict` for previous versions.
321 :param list_class: List class to use for decoded data. Default is `list`.
322 :param preserve_root: If `True` the root element will be preserved. For default \
323 the Parker convention remove the document root element, returning only the value.
324 """
325 - def __init__(self, namespaces=None, dict_class=None, list_class=None, preserve_root=False, **kwargs):
331
336
337 @property
340
341 - def copy(self, **kwargs):
342 return type(self)(
343 namespaces=kwargs.get('namespaces', self._namespaces),
344 dict_class=kwargs.get('dict_class', self.dict),
345 list_class=kwargs.get('list_class', self.list),
346 preserve_root=kwargs.get('preserve_root', self.preserve_root),
347 etree_element_class=kwargs.get('etree_element_class'),
348 indent=kwargs.get('indent', self.indent),
349 )
350
352 map_qname = self.map_qname
353 preserve_root = self.preserve_root
354 if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
355 if preserve_root:
356 return self.dict([(map_qname(data.tag), data.text)])
357 else:
358 return data.text if data.text != '' else None
359 else:
360 result_dict = self.dict()
361 list_types = list if self.list is list else (self.list, list)
362 for name, value, xsd_child in self.map_content(data.content):
363 if preserve_root:
364 try:
365 if len(value) == 1:
366 value = value[name]
367 except (TypeError, KeyError):
368 pass
369
370 try:
371 result_dict[name].append(value)
372 except KeyError:
373 if isinstance(value, list_types):
374 result_dict[name] = self.list([value])
375 else:
376 result_dict[name] = value
377 except AttributeError:
378 result_dict[name] = self.list([result_dict[name], value])
379
380 for k, v in result_dict.items():
381 if isinstance(v, (self.list, list)) and len(v) == 1:
382 value = v.pop()
383 v.extend(value)
384
385 if preserve_root:
386 return self.dict([(map_qname(data.tag), result_dict)])
387 else:
388 return result_dict if result_dict else None
389
391 if not isinstance(obj, (self.dict, dict)):
392 if obj == '':
393 obj = None
394 if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
395 return ElementData(xsd_element.name, obj, None, self.dict())
396 else:
397 return ElementData(xsd_element.name, None, obj, self.dict())
398 else:
399 unmap_qname = self.unmap_qname
400 if not obj:
401 return ElementData(xsd_element.name, None, None, self.dict())
402 elif self.preserve_root:
403 try:
404 items = obj[self.map_qname(xsd_element.name)]
405 except KeyError:
406 return ElementData(xsd_element.name, None, None, self.dict())
407 else:
408 items = obj
409
410 try:
411 content = []
412 for name, value in obj.items():
413 ns_name = unmap_qname(name)
414 if not isinstance(value, (self.list, list)) or not value:
415 content.append((ns_name, value))
416 elif any(isinstance(v, (self.list, list)) for v in value):
417 for item in value:
418 content.append((ns_name, item))
419 else:
420 for xsd_child in xsd_element.type.content_type.iter_elements():
421 matched_element = xsd_child.match(ns_name, self.get(''))
422 if matched_element is not None:
423 if matched_element.type.is_list():
424 content.append((ns_name, value))
425 else:
426 for item in value:
427 content.append((ns_name, item))
428 break
429 else:
430 for item in value:
431 content.append((ns_name, item))
432
433 except AttributeError:
434 return ElementData(xsd_element.name, items, None, self.dict())
435 else:
436 return ElementData(xsd_element.name, None, content, self.dict())
437
440 """
441 XML Schema based converter class for Badgerfish convention.
442
443 ref: http://www.sklar.com/badgerfish/
444 ref: http://badgerfish.ning.com/
445
446 :param namespaces: Map from namespace prefixes to URI.
447 :param dict_class: Dictionary class to use for decoded data. Default is `dict` for \
448 Python 3.6+ or `OrderedDict` for previous versions.
449 :param list_class: List class to use for decoded data. Default is `list`.
450 """
451 - def __init__(self, namespaces=None, dict_class=None, list_class=None, **kwargs):
456
462
463 @property
466
468 dict_class = self.dict
469
470 tag = self.map_qname(data.tag)
471 has_local_root = not len(self)
472 result_dict = dict_class([t for t in self.map_attributes(data.attributes)])
473 if has_local_root:
474 result_dict['@xmlns'] = dict_class()
475
476 if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
477 if data.text is not None and data.text != '':
478 result_dict[self.text_key] = data.text
479 else:
480 has_single_group = xsd_element.type.content_type.is_single()
481 list_types = list if self.list is list else (self.list, list)
482 for name, value, xsd_child in self.map_content(data.content):
483 try:
484 if '@xmlns' in value:
485 self.transfer(value['@xmlns'])
486 if not value['@xmlns']:
487 del value['@xmlns']
488 elif '@xmlns' in value[name]:
489 self.transfer(value[name]['@xmlns'])
490 if not value[name]['@xmlns']:
491 del value[name]['@xmlns']
492 if len(value) == 1:
493 value = value[name]
494 except (TypeError, KeyError):
495 pass
496
497 if value is None:
498 value = self.dict()
499
500 try:
501 result = result_dict[name]
502 except KeyError:
503 if xsd_child is None or has_single_group and xsd_child.is_single():
504 result_dict[name] = value
505 else:
506 result_dict[name] = self.list([value])
507 else:
508 if not isinstance(result, list_types) or not result:
509 result_dict[name] = self.list([result, value])
510 elif isinstance(result[0], list_types) or not isinstance(value, list_types):
511 result.append(value)
512 else:
513 result_dict[name] = self.list([result, value])
514
515 if has_local_root:
516 if self:
517 result_dict['@xmlns'].update(self)
518 else:
519 del result_dict['@xmlns']
520 return dict_class([(tag, result_dict)])
521 else:
522 return dict_class([('@xmlns', dict_class(self)), (tag, result_dict)])
523
525 map_qname = self.map_qname
526 unmap_qname = self.unmap_qname
527 unmap_attribute_qname = self._unmap_attribute_qname
528 tag = xsd_element.qualified_name if level == 0 else xsd_element.name
529
530 try:
531 self.update(obj['@xmlns'])
532 except KeyError:
533 pass
534
535 try:
536 element_data = obj[map_qname(xsd_element.name)]
537 except KeyError:
538 element_data = obj
539
540 text_key = self.text_key
541 attr_prefix = self.attr_prefix
542 cdata_prefix = self.cdata_prefix
543 text = None
544 content = []
545 attributes = self.dict()
546 for name, value in element_data.items():
547 if name == '@xmlns':
548 continue
549 elif text_key and name == text_key:
550 text = element_data[text_key]
551 elif (cdata_prefix and name.startswith(cdata_prefix)) or \
552 name[0].isdigit() and cdata_prefix == '':
553 index = int(name[len(cdata_prefix):])
554 content.append((index, value))
555 elif attr_prefix and name.startswith(attr_prefix):
556 name = name[len(attr_prefix):]
557 attributes[unmap_attribute_qname(name)] = value
558 elif not isinstance(value, (self.list, list)) or not value:
559 content.append((unmap_qname(name), value))
560 elif isinstance(value[0], (self.dict, dict, self.list, list)):
561 ns_name = unmap_qname(name)
562 for item in value:
563 content.append((ns_name, item))
564 else:
565 ns_name = unmap_qname(name)
566 for xsd_child in xsd_element.type.content_type.iter_elements():
567 matched_element = xsd_child.match(ns_name, self.get(''))
568 if matched_element is not None:
569 if matched_element.type.is_list():
570 content.append((ns_name, value))
571 else:
572 for item in value:
573 content.append((ns_name, item))
574 break
575 else:
576 if attr_prefix == '' and ns_name not in attributes:
577 for xsd_attribute in xsd_element.attributes.values():
578 if xsd_attribute.is_matching(ns_name):
579 attributes[ns_name] = value
580 break
581 else:
582 content.append((ns_name, value))
583 else:
584 content.append((ns_name, value))
585
586 return ElementData(tag, text, content, attributes)
587
590 """
591 XML Schema based converter class for Abdera convention.
592
593 ref: http://wiki.open311.org/JSON_and_XML_Conversion/#the-abdera-convention
594 ref: https://cwiki.apache.org/confluence/display/ABDERA/JSON+Serialization
595
596 :param namespaces: Map from namespace prefixes to URI.
597 :param dict_class: Dictionary class to use for decoded data. Default is `dict` for \
598 Python 3.6+ or `OrderedDict` for previous versions.
599 :param list_class: List class to use for decoded data. Default is `list`.
600 """
601 - def __init__(self, namespaces=None, dict_class=None, list_class=None, **kwargs):
606
611
612 @property
615
617 if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
618 children = data.text if data.text is not None and data.text != '' else None
619 else:
620 children = self.dict()
621 for name, value, xsd_child in self.map_content(data.content):
622 if value is None:
623 value = self.list()
624
625 try:
626 children[name].append(value)
627 except KeyError:
628 if isinstance(value, (self.list, list)) and value:
629 children[name] = self.list([value])
630 else:
631 children[name] = value
632 except AttributeError:
633 children[name] = self.list([children[name], value])
634 if not children:
635 children = None
636
637 if data.attributes:
638 if children != []:
639 return self.dict([
640 ('attributes', self.dict([(k, v) for k, v in self.map_attributes(data.attributes)])),
641 ('children', self.list([children]) if children is not None else self.list())
642 ])
643 else:
644 return self.dict([
645 ('attributes', self.dict([(k, v) for k, v in self.map_attributes(data.attributes)])),
646 ])
647 else:
648 return children if children is not None else self.list()
649
651 tag = xsd_element.qualified_name if level == 0 else xsd_element.name
652
653 if not isinstance(obj, (self.dict, dict)):
654 if obj == []:
655 obj = None
656 return ElementData(tag, obj, None, self.dict())
657 else:
658 unmap_qname = self.unmap_qname
659 unmap_attribute_qname = self._unmap_attribute_qname
660 attributes = self.dict()
661 try:
662 attributes.update([(unmap_attribute_qname(k), v) for k, v in obj['attributes'].items()])
663 except KeyError:
664 children = obj
665 else:
666 children = obj.get('children', [])
667
668 if isinstance(children, (self.dict, dict)):
669 children = [children]
670 elif children and not isinstance(children[0], (self.dict, dict)):
671 if len(children) > 1:
672 raise XMLSchemaValueError("Wrong format")
673 else:
674 return ElementData(tag, children[0], None, attributes)
675
676 content = []
677 for child in children:
678 for name, value in child.items():
679 if not isinstance(value, (self.list, list)) or not value:
680 content.append((unmap_qname(name), value))
681 elif isinstance(value[0], (self.dict, dict, self.list, list)):
682 ns_name = unmap_qname(name)
683 for item in value:
684 content.append((ns_name, item))
685 else:
686 ns_name = unmap_qname(name)
687 for xsd_child in xsd_element.type.content_type.iter_elements():
688 matched_element = xsd_child.match(ns_name, self.get(''))
689 if matched_element is not None:
690 if matched_element.type.is_list():
691 content.append((ns_name, value))
692 else:
693 for item in value:
694 content.append((ns_name, item))
695 break
696 else:
697 content.append((ns_name, value))
698
699 return ElementData(tag, None, content, attributes)
700
703 """
704 XML Schema based converter class for JsonML (JSON Mark-up Language) convention.
705
706 ref: http://www.jsonml.org/
707 ref: https://www.ibm.com/developerworks/library/x-jsonml/
708
709 :param namespaces: Map from namespace prefixes to URI.
710 :param dict_class: Dictionary class to use for decoded data. Default is `dict` for \
711 Python 3.6+ or `OrderedDict` for previous versions.
712 :param list_class: List class to use for decoded data. Default is `list`.
713 """
714 - def __init__(self, namespaces=None, dict_class=None, list_class=None, **kwargs):
719
724
725 @property
728
729 @property
732
751
753 unmap_qname = self.unmap_qname
754 attributes = self.dict()
755 if not isinstance(obj, (self.list, list)) or not obj:
756 raise XMLSchemaValueError("Wrong data format, a not empty list required: %r." % obj)
757
758 data_len = len(obj)
759 if data_len == 1:
760 if not xsd_element.is_matching(unmap_qname(obj[0]), self.get('')):
761 raise XMLSchemaValueError("Unmatched tag")
762 return ElementData(xsd_element.name, None, None, attributes)
763
764 unmap_attribute_qname = self._unmap_attribute_qname
765 try:
766 for k, v in obj[1].items():
767 if k == 'xmlns':
768 self[''] = v
769 elif k.startswith('xmlns:'):
770 self[k.split('xmlns:')[1]] = v
771 else:
772 attributes[unmap_attribute_qname(k)] = v
773 except AttributeError:
774 content_index = 1
775 else:
776 content_index = 2
777
778 if not xsd_element.is_matching(unmap_qname(obj[0]), self.get('')):
779 raise XMLSchemaValueError("Unmatched tag")
780
781 if data_len <= content_index:
782 return ElementData(xsd_element.name, None, [], attributes)
783 elif data_len == content_index + 1 and (xsd_element.type.is_simple()
784 or xsd_element.type.has_simple_content()):
785 return ElementData(xsd_element.name, obj[content_index], [], attributes)
786 else:
787 cdata_num = iter(range(1, data_len))
788 list_types = list if self.list is list else (self.list, list)
789 content = [
790 (unmap_qname(e[0]), e) if isinstance(e, list_types) else (next(cdata_num), e)
791 for e in obj[content_index:]
792 ]
793 return ElementData(xsd_element.name, None, content, attributes)
794