Package xmlschema_acue ::
Package validators ::
Module models
1
2
3
4
5
6
7
8
9
10
11 """
12 This module contains classes and functions for processing XSD content models.
13 """
14 from __future__ import unicode_literals
15 from collections import Counter
16
17 from xmlschema_acue.compat import PY3, MutableSequence
18 from xmlschema_acue.exceptions import XMLSchemaValueError
19 from xmlschema_acue.validators.exceptions import XMLSchemaModelError, XMLSchemaModelDepthError
20 from xmlschema_acue.validators.xsdbase import ParticleMixin
21
22 MAX_MODEL_DEPTH = 15
23 """Limit depth for safe visiting of models"""
24
25 XSD_GROUP_MODELS = {'sequence', 'choice', 'all'}
26
27
28 -class ModelGroup(MutableSequence, ParticleMixin):
29 """
30 Class for XSD model group particles. This class implements only model related methods,
31 schema element parsing and validation methods are implemented in derived classes.
32 """
37
39 return '%s(model=%r, occurs=%r)' % (self.__class__.__name__, self.model, self.occurs)
40
41
44
46 assert isinstance(item, (tuple, ParticleMixin)), "Items must be tuples or XSD particles"
47 self._group[i] = item
48
51
53 return len(self._group)
54
56 assert isinstance(item, (tuple, ParticleMixin)), "Items must be tuples or XSD particles"
57 self._group.insert(i, item)
58
69
72
78
81
83 """
84 Returns `True` if the group may be eliminated without affecting the model, `False` otherwise.
85 A group is pointless if one of those conditions is verified:
86
87 - the group is empty
88 - minOccurs == maxOccurs == 1 and the group has one child
89 - minOccurs == maxOccurs == 1 and the group and its parent have a sequence model
90 - minOccurs == maxOccurs == 1 and the group and its parent have a choice model
91
92 Ref: https://www.w3.org/TR/2004/REC-xmlschema_acue-1-20041028/#coss-particle
93
94 :param parent: effective parent of the model group.
95 """
96 if not self:
97 return True
98 elif self.min_occurs != 1 or self.max_occurs != 1:
99 return False
100 elif len(self) == 1:
101 return True
102 elif not isinstance(parent, ModelGroup):
103 return False
104 elif self.model == 'sequence' and parent.model != 'sequence':
105 return False
106 elif self.model == 'choice' and parent.model != 'choice':
107 return False
108 else:
109 return True
110
141
143 """
144 A generator function iterating elements and groups of a model group. Skips pointless groups,
145 iterating deeper through them. Raises `XMLSchemaModelDepthError` if the argument *depth* is
146 over `MAX_MODEL_DEPTH` value.
147
148 :param depth: guard for protect model nesting bombs, incremented at each deepest recursion.
149 """
150 if depth > MAX_MODEL_DEPTH:
151 raise XMLSchemaModelDepthError(self)
152 for item in self:
153 if not isinstance(item, ModelGroup):
154 yield item
155 elif not item.is_pointless(parent=self):
156 yield item
157 else:
158 for obj in item.iter_model(depth+1):
159 yield obj
160
162 """
163 A generator function iterating model's elements. Raises `XMLSchemaModelDepthError` if the
164 argument *depth* is over `MAX_MODEL_DEPTH` value.
165
166 :param depth: guard for protect model nesting bombs, incremented at each deepest recursion.
167 """
168 if depth > MAX_MODEL_DEPTH:
169 raise XMLSchemaModelDepthError(self)
170 for item in self:
171 if isinstance(item, ModelGroup):
172 for e in item.iter_elements(depth+1):
173 yield e
174 else:
175 yield item
176
185
187 """
188 Checks if the model group is deterministic. Types matching of same elements and Unique Particle
189 Attribution Constraint are checked. Raises an `XMLSchemaModelError` at first violated constraint.
190 """
191 def safe_iter_path(group, depth):
192 if depth > MAX_MODEL_DEPTH:
193 raise XMLSchemaModelDepthError(group)
194 for item in group:
195 if isinstance(item, ModelGroup):
196 current_path.append(item)
197 for _item in safe_iter_path(item, depth + 1):
198 yield _item
199 current_path.pop()
200 else:
201 yield item
202
203 paths = {}
204 current_path = [self]
205 for e in safe_iter_path(self, 0):
206 for pe, previous_path in paths.values():
207 if pe.name == e.name and pe.name is not None and pe.type is not e.type:
208 raise XMLSchemaModelError(
209 self, "The model has elements with the same name %r but a different type" % e.name
210 )
211 elif not pe.overlap(e):
212 continue
213 elif pe is not e and pe.parent is e.parent:
214 if pe.parent.model in {'all', 'choice'}:
215 msg = "{!r} and {!r} overlap and are in the same {!r} group"
216 raise XMLSchemaModelError(self, msg.format(pe, e, pe.parent.model))
217 elif pe.min_occurs == pe.max_occurs:
218 continue
219
220 if not distinguishable_paths(previous_path + [pe], current_path + [e]):
221 raise XMLSchemaModelError(
222 self, "Unique Particle Attribution violation between {!r} and {!r}".format(pe, e)
223 )
224 paths[e.name] = e, current_path[:]
225
228 """
229 Checks if two model paths are distinguishable in a deterministic way, without looking forward
230 or backtracking. The arguments are lists containing paths from the base group of the model to
231 a couple of leaf elements. Returns `True` if there is a deterministic separation between paths,
232 `False` if the paths are ambiguous.
233 """
234 e1, e2 = path1[-1], path2[-1]
235
236 for k, e in enumerate(path1):
237 if e not in path2:
238 depth = k - 1
239 break
240 else:
241 depth = 0
242
243 if path1[depth].max_occurs == 0:
244 return True
245
246 univocal1 = univocal2 = True
247 if path1[depth].model == 'sequence':
248 idx1 = path1[depth].index(path1[depth + 1])
249 idx2 = path2[depth].index(path2[depth + 1])
250 before1 = any(not e.is_emptiable() for e in path1[depth][:idx1])
251 after1 = before2 = any(not e.is_emptiable() for e in path1[depth][idx1 + 1:idx2])
252 after2 = any(not e.is_emptiable() for e in path1[depth][idx2 + 1:])
253 else:
254 before1 = after1 = before2 = after2 = False
255
256 for k in range(depth + 1, len(path1) - 1):
257 univocal1 &= path1[k].is_univocal()
258 if path1[k].model == 'sequence':
259 idx = path1[k].index(path1[k + 1])
260 before1 |= any(not e.is_emptiable() for e in path1[k][:idx])
261 after1 |= any(not e.is_emptiable() for e in path1[k][idx + 1:])
262
263 for k in range(depth + 1, len(path2) - 1):
264 univocal2 &= path2[k].is_univocal()
265 if path2[k].model == 'sequence':
266 idx = path2[k].index(path2[k + 1])
267 before2 |= any(not e.is_emptiable() for e in path2[k][:idx])
268 after2 |= any(not e.is_emptiable() for e in path2[k][idx + 1:])
269
270 if path1[depth].model != 'sequence':
271 return before1 and before2 or \
272 (before1 and (univocal1 and e1.is_univocal() or after1 or path1[depth].max_occurs == 1)) or \
273 (before2 and (univocal2 and e2.is_univocal() or after2 or path2[depth].max_occurs == 1))
274 elif path1[depth].max_occurs == 1:
275 return before2 or (before1 or univocal1) and (e1.is_univocal() or after1)
276 else:
277 return (before2 or (before1 or univocal1) and (e1.is_univocal() or after1)) and \
278 (before1 or (before2 or univocal2) and (e2.is_univocal() or after2))
279
282 """
283 A visitor design pattern class that can be used for validating XML data related to an XSD
284 model group. The visit of the model is done using an external match information,
285 counting the occurrences and yielding tuples in case of model's item occurrence errors.
286 Ends setting the current element to `None`.
287
288 :param root: the root ModelGroup instance of the model.
289 :ivar occurs: the Counter instance for keeping track of occurrences of XSD elements and groups.
290 :ivar element: the current XSD element, initialized to the first element of the model.
291 :ivar broken: a boolean value that records if the model is still usable.
292 :ivar group: the current XSD model group, initialized to *root* argument.
293 :ivar iterator: the current XSD group iterator.
294 :ivar items: the current XSD group unmatched items.
295 :ivar match: if the XSD group has an effective item match.
296 """
298 self.root = root
299 self.occurs = Counter()
300 self._subgroups = []
301 self.element = None
302 self.broken = False
303 self.group, self.iterator, self.items, self.match = root, iter(root), root[::-1], False
304 self._start()
305
307
308 return unicode(self).encode("utf-8")
309
312
313 if PY3:
314 __str__ = __unicode__
315
317 return '%s(root=%r)' % (self.__class__.__name__, self.root)
318
319
321 return self._subgroups[i]
322
324 self._subgroups[i] = item
325
327 del self._subgroups[i]
328
330 return len(self._subgroups)
331
333 self._subgroups.insert(i, item)
334
336 del self._subgroups[:]
337 self.occurs.clear()
338 self.element = None
339 self.broken = False
340 self.group, self.iterator, self.items, self.match = self.root, iter(self.root), self.root[::-1], False
341
343 while True:
344 item = next(self.iterator, None)
345 if item is None or not isinstance(item, ModelGroup):
346 self.element = item
347 break
348 elif item:
349 self.append((self.group, self.iterator, self.items, self.match))
350 self.group, self.iterator, self.items, self.match = item, iter(item), item[::-1], False
351
352 @property
365
367 self.clear()
368 self._start()
369
371 while self.element is not None:
372 for e in self.advance():
373 yield e
374
376 """
377 Generator function for advance to the next element. Yields tuples with
378 particles information when occurrence violation is found.
379
380 :param match: provides current element match.
381 """
382 def stop_item(item):
383 """
384 Stops element or group matching, incrementing current group counter.
385
386 :return: `True` if the item has violated the minimum occurrences for itself \
387 or for the current group, `False` otherwise.
388 """
389 if isinstance(item, ModelGroup):
390 self.group, self.iterator, self.items, self.match = self.pop()
391
392 item_occurs = occurs[item]
393 model = self.group.model
394 if item_occurs:
395 self.match = True
396 if model == 'choice':
397 occurs[item] = 0
398 occurs[self.group] += 1
399 self.iterator, self.match = iter(self.group), False
400 else:
401 if model == 'all':
402 self.items.remove(item)
403 else:
404 self.items.pop()
405 if not self.items:
406 self.occurs[self.group] += 1
407 return item.is_missing(item_occurs)
408
409 elif model == 'sequence':
410 if self.match:
411 self.items.pop()
412 if not self.items:
413 occurs[self.group] += 1
414 return not item.is_emptiable()
415 elif item.is_emptiable():
416 self.items.pop()
417 return False
418 elif self.group.min_occurs <= occurs[self.group] or self:
419 return stop_item(self.group)
420 else:
421 self.items.pop()
422 return True
423
424 element, occurs = self.element, self.occurs
425 if element is None:
426 raise XMLSchemaValueError("cannot advance, %r is ended!" % self)
427
428 if match:
429 occurs[element] += 1
430 self.match = True
431 if not element.is_over(occurs[element]):
432 return
433 try:
434 if stop_item(element):
435 yield element, occurs[element], [element]
436
437 while True:
438 while self.group.is_over(occurs[self.group]):
439 stop_item(self.group)
440
441 obj = next(self.iterator, None)
442 if obj is None:
443 if not self.match:
444 if self.group.model == 'all' and all(e.min_occurs == 0 for e in self.items):
445 occurs[self.group] += 1
446 group, expected = self.group, self.items
447 if stop_item(group) and expected:
448 yield group, occurs[group], self.expected
449 elif not self.items:
450 self.iterator, self.items, self.match = iter(self.group), self.group[::-1], False
451 elif self.group.model == 'all':
452 self.iterator, self.match = iter(self.items), False
453 elif all(e.min_occurs == 0 for e in self.items):
454 self.iterator, self.items, self.match = iter(self.group), self.group[::-1], False
455 occurs[self.group] += 1
456
457 elif not isinstance(obj, ModelGroup):
458 self.element, occurs[obj] = obj, 0
459 return
460
461 elif obj:
462 self.append((self.group, self.iterator, self.items, self.match))
463 self.group, self.iterator, self.items, self.match = obj, iter(obj), obj[::-1], False
464 occurs[obj] = 0
465
466 except IndexError:
467 self.element = None
468 if self.group.is_missing(occurs[self.group]) and self.items:
469 yield self.group, occurs[self.group], self.expected
470