Package xmlschema_acue :: Package validators :: Module models

Source Code for Module xmlschema_acue.validators.models

  1  # -*- coding: utf-8 -*- 
  2  # 
  3  # Copyright (c), 2016-2019, SISSA (International School for Advanced Studies). 
  4  # All rights reserved. 
  5  # This file is distributed under the terms of the MIT License. 
  6  # See the file 'LICENSE' in the root directory of the present 
  7  # distribution, or http://opensource.org/licenses/MIT. 
  8  # 
  9  # @author Davide Brunato <brunato@sissa.it> 
 10  # 
 11  """ 
 12  This module contains classes and functions for processing XSD content models. 
 13  """ 
 14  from __future__ import unicode_literals 
 15  from collections import Counter 
 16   
 17  from xmlschema_acue.compat import PY3, MutableSequence 
 18  from xmlschema_acue.exceptions import XMLSchemaValueError 
 19  from xmlschema_acue.validators.exceptions import XMLSchemaModelError, XMLSchemaModelDepthError 
 20  from xmlschema_acue.validators.xsdbase import ParticleMixin 
 21   
 22  MAX_MODEL_DEPTH = 15 
 23  """Limit depth for safe visiting of models""" 
 24   
 25  XSD_GROUP_MODELS = {'sequence', 'choice', 'all'} 
26 27 28 -class ModelGroup(MutableSequence, ParticleMixin):
29 """ 30 Class for XSD model group particles. This class implements only model related methods, 31 schema element parsing and validation methods are implemented in derived classes. 32 """
33 - def __init__(self, model):
34 assert model in XSD_GROUP_MODELS, "Not a valid value for 'model'" 35 self._group = [] 36 self.model = model
37
38 - def __repr__(self):
39 return '%s(model=%r, occurs=%r)' % (self.__class__.__name__, self.model, self.occurs)
40 41 # Implements the abstract methods of MutableSequence
42 - def __getitem__(self, i):
43 return self._group[i]
44
45 - def __setitem__(self, i, item):
46 assert isinstance(item, (tuple, ParticleMixin)), "Items must be tuples or XSD particles" 47 self._group[i] = item
48
49 - def __delitem__(self, i):
50 del self._group[i]
51
52 - def __len__(self):
53 return len(self._group)
54
55 - def insert(self, i, item):
56 assert isinstance(item, (tuple, ParticleMixin)), "Items must be tuples or XSD particles" 57 self._group.insert(i, item)
58
59 - def __setattr__(self, name, value):
60 if name == 'model' and value is not None: 61 if value not in XSD_GROUP_MODELS: 62 raise XMLSchemaValueError("invalid model group %r." % value) 63 if self.model is not None and value != self.model and self.model != 'all': 64 raise XMLSchemaValueError("cannot change group model from %r to %r" % (self.model, value)) 65 elif name == '_group': 66 if not all(isinstance(item, (tuple, ParticleMixin)) for item in value): 67 raise XMLSchemaValueError("XsdGroup's items must be tuples or ParticleMixin instances.") 68 super(ModelGroup, self).__setattr__(name, value)
69
70 - def clear(self):
71 del self._group[:]
72
73 - def is_emptiable(self):
74 if self.model == 'choice': 75 return self.min_occurs == 0 or not self or any([item.is_emptiable() for item in self]) 76 else: 77 return self.min_occurs == 0 or not self or all([item.is_emptiable() for item in self])
78
79 - def is_empty(self):
80 return not self._group or self.max_occurs == 0
81
82 - def is_pointless(self, parent):
83 """ 84 Returns `True` if the group may be eliminated without affecting the model, `False` otherwise. 85 A group is pointless if one of those conditions is verified: 86 87 - the group is empty 88 - minOccurs == maxOccurs == 1 and the group has one child 89 - minOccurs == maxOccurs == 1 and the group and its parent have a sequence model 90 - minOccurs == maxOccurs == 1 and the group and its parent have a choice model 91 92 Ref: https://www.w3.org/TR/2004/REC-xmlschema_acue-1-20041028/#coss-particle 93 94 :param parent: effective parent of the model group. 95 """ 96 if not self: 97 return True 98 elif self.min_occurs != 1 or self.max_occurs != 1: 99 return False 100 elif len(self) == 1: 101 return True 102 elif not isinstance(parent, ModelGroup): 103 return False 104 elif self.model == 'sequence' and parent.model != 'sequence': 105 return False 106 elif self.model == 'choice' and parent.model != 'choice': 107 return False 108 else: 109 return True
110
111 - def has_occurs_restriction(self, other):
112 if not self: 113 return True 114 elif isinstance(other, ModelGroup): 115 return super(ModelGroup, self).has_occurs_restriction(other) 116 117 # Group particle compared to element particle 118 if self.max_occurs is None or any(e.max_occurs is None for e in self): 119 if other.max_occurs is not None: 120 return False 121 elif self.model == 'choice': 122 return self.min_occurs * min(e.min_occurs for e in self) >= other.min_occurs 123 else: 124 return self.min_occurs * sum(e.min_occurs for e in self) >= other.min_occurs 125 126 elif self.model == 'choice': 127 if self.min_occurs * min(e.min_occurs for e in self) < other.min_occurs: 128 return False 129 elif other.max_occurs is None: 130 return True 131 else: 132 return self.max_occurs * max(e.max_occurs for e in self) <= other.max_occurs 133 134 else: 135 if self.min_occurs * sum(e.min_occurs for e in self) < other.min_occurs: 136 return False 137 elif other.max_occurs is None: 138 return True 139 else: 140 return self.max_occurs * sum(e.max_occurs for e in self) <= other.max_occurs
141
142 - def iter_model(self, depth=0):
143 """ 144 A generator function iterating elements and groups of a model group. Skips pointless groups, 145 iterating deeper through them. Raises `XMLSchemaModelDepthError` if the argument *depth* is 146 over `MAX_MODEL_DEPTH` value. 147 148 :param depth: guard for protect model nesting bombs, incremented at each deepest recursion. 149 """ 150 if depth > MAX_MODEL_DEPTH: 151 raise XMLSchemaModelDepthError(self) 152 for item in self: 153 if not isinstance(item, ModelGroup): 154 yield item 155 elif not item.is_pointless(parent=self): 156 yield item 157 else: 158 for obj in item.iter_model(depth+1): 159 yield obj
160
161 - def iter_elements(self, depth=0):
162 """ 163 A generator function iterating model's elements. Raises `XMLSchemaModelDepthError` if the 164 argument *depth* is over `MAX_MODEL_DEPTH` value. 165 166 :param depth: guard for protect model nesting bombs, incremented at each deepest recursion. 167 """ 168 if depth > MAX_MODEL_DEPTH: 169 raise XMLSchemaModelDepthError(self) 170 for item in self: 171 if isinstance(item, ModelGroup): 172 for e in item.iter_elements(depth+1): 173 yield e 174 else: 175 yield item
176
177 - def iter_subelements(self, depth=0):
178 if depth <= MAX_MODEL_DEPTH: 179 for item in self: 180 if isinstance(item, ModelGroup): 181 for e in item.iter_subelements(depth+1): 182 yield e 183 else: 184 yield item
185
186 - def check_model(self):
187 """ 188 Checks if the model group is deterministic. Types matching of same elements and Unique Particle 189 Attribution Constraint are checked. Raises an `XMLSchemaModelError` at first violated constraint. 190 """ 191 def safe_iter_path(group, depth): 192 if depth > MAX_MODEL_DEPTH: 193 raise XMLSchemaModelDepthError(group) 194 for item in group: 195 if isinstance(item, ModelGroup): 196 current_path.append(item) 197 for _item in safe_iter_path(item, depth + 1): 198 yield _item 199 current_path.pop() 200 else: 201 yield item
202 203 paths = {} 204 current_path = [self] 205 for e in safe_iter_path(self, 0): 206 for pe, previous_path in paths.values(): 207 if pe.name == e.name and pe.name is not None and pe.type is not e.type: 208 raise XMLSchemaModelError( 209 self, "The model has elements with the same name %r but a different type" % e.name 210 ) 211 elif not pe.overlap(e): 212 continue 213 elif pe is not e and pe.parent is e.parent: 214 if pe.parent.model in {'all', 'choice'}: 215 msg = "{!r} and {!r} overlap and are in the same {!r} group" 216 raise XMLSchemaModelError(self, msg.format(pe, e, pe.parent.model)) 217 elif pe.min_occurs == pe.max_occurs: 218 continue 219 220 if not distinguishable_paths(previous_path + [pe], current_path + [e]): 221 raise XMLSchemaModelError( 222 self, "Unique Particle Attribution violation between {!r} and {!r}".format(pe, e) 223 ) 224 paths[e.name] = e, current_path[:]
225
226 227 -def distinguishable_paths(path1, path2):
228 """ 229 Checks if two model paths are distinguishable in a deterministic way, without looking forward 230 or backtracking. The arguments are lists containing paths from the base group of the model to 231 a couple of leaf elements. Returns `True` if there is a deterministic separation between paths, 232 `False` if the paths are ambiguous. 233 """ 234 e1, e2 = path1[-1], path2[-1] 235 236 for k, e in enumerate(path1): 237 if e not in path2: 238 depth = k - 1 239 break 240 else: 241 depth = 0 242 243 if path1[depth].max_occurs == 0: 244 return True 245 246 univocal1 = univocal2 = True 247 if path1[depth].model == 'sequence': 248 idx1 = path1[depth].index(path1[depth + 1]) 249 idx2 = path2[depth].index(path2[depth + 1]) 250 before1 = any(not e.is_emptiable() for e in path1[depth][:idx1]) 251 after1 = before2 = any(not e.is_emptiable() for e in path1[depth][idx1 + 1:idx2]) 252 after2 = any(not e.is_emptiable() for e in path1[depth][idx2 + 1:]) 253 else: 254 before1 = after1 = before2 = after2 = False 255 256 for k in range(depth + 1, len(path1) - 1): 257 univocal1 &= path1[k].is_univocal() 258 if path1[k].model == 'sequence': 259 idx = path1[k].index(path1[k + 1]) 260 before1 |= any(not e.is_emptiable() for e in path1[k][:idx]) 261 after1 |= any(not e.is_emptiable() for e in path1[k][idx + 1:]) 262 263 for k in range(depth + 1, len(path2) - 1): 264 univocal2 &= path2[k].is_univocal() 265 if path2[k].model == 'sequence': 266 idx = path2[k].index(path2[k + 1]) 267 before2 |= any(not e.is_emptiable() for e in path2[k][:idx]) 268 after2 |= any(not e.is_emptiable() for e in path2[k][idx + 1:]) 269 270 if path1[depth].model != 'sequence': 271 return before1 and before2 or \ 272 (before1 and (univocal1 and e1.is_univocal() or after1 or path1[depth].max_occurs == 1)) or \ 273 (before2 and (univocal2 and e2.is_univocal() or after2 or path2[depth].max_occurs == 1)) 274 elif path1[depth].max_occurs == 1: 275 return before2 or (before1 or univocal1) and (e1.is_univocal() or after1) 276 else: 277 return (before2 or (before1 or univocal1) and (e1.is_univocal() or after1)) and \ 278 (before1 or (before2 or univocal2) and (e2.is_univocal() or after2))
279
280 281 -class ModelVisitor(MutableSequence):
282 """ 283 A visitor design pattern class that can be used for validating XML data related to an XSD 284 model group. The visit of the model is done using an external match information, 285 counting the occurrences and yielding tuples in case of model's item occurrence errors. 286 Ends setting the current element to `None`. 287 288 :param root: the root ModelGroup instance of the model. 289 :ivar occurs: the Counter instance for keeping track of occurrences of XSD elements and groups. 290 :ivar element: the current XSD element, initialized to the first element of the model. 291 :ivar broken: a boolean value that records if the model is still usable. 292 :ivar group: the current XSD model group, initialized to *root* argument. 293 :ivar iterator: the current XSD group iterator. 294 :ivar items: the current XSD group unmatched items. 295 :ivar match: if the XSD group has an effective item match. 296 """
297 - def __init__(self, root):
298 self.root = root 299 self.occurs = Counter() 300 self._subgroups = [] 301 self.element = None 302 self.broken = False 303 self.group, self.iterator, self.items, self.match = root, iter(root), root[::-1], False 304 self._start()
305
306 - def __str__(self):
307 # noinspection PyCompatibility,PyUnresolvedReferences 308 return unicode(self).encode("utf-8")
309
310 - def __unicode__(self):
311 return self.__repr__()
312 313 if PY3: 314 __str__ = __unicode__ 315
316 - def __repr__(self):
317 return '%s(root=%r)' % (self.__class__.__name__, self.root)
318 319 # Implements the abstract methods of MutableSequence
320 - def __getitem__(self, i):
321 return self._subgroups[i]
322
323 - def __setitem__(self, i, item):
324 self._subgroups[i] = item
325
326 - def __delitem__(self, i):
327 del self._subgroups[i]
328
329 - def __len__(self):
330 return len(self._subgroups)
331
332 - def insert(self, i, item):
333 self._subgroups.insert(i, item)
334
335 - def clear(self):
336 del self._subgroups[:] 337 self.occurs.clear() 338 self.element = None 339 self.broken = False 340 self.group, self.iterator, self.items, self.match = self.root, iter(self.root), self.root[::-1], False
341
342 - def _start(self):
343 while True: 344 item = next(self.iterator, None) 345 if item is None or not isinstance(item, ModelGroup): 346 self.element = item 347 break 348 elif item: 349 self.append((self.group, self.iterator, self.items, self.match)) 350 self.group, self.iterator, self.items, self.match = item, iter(item), item[::-1], False
351 352 @property
353 - def expected(self):
354 """ 355 Returns the expected elements of the current and descendant groups. 356 """ 357 expected = [] 358 for item in reversed(self.items): 359 if isinstance(item, ModelGroup): 360 expected.extend(item.iter_elements()) 361 else: 362 expected.append(item) 363 expected.extend(item.maps.substitution_groups.get(item.name, ())) 364 return expected
365
366 - def restart(self):
367 self.clear() 368 self._start()
369
370 - def stop(self):
371 while self.element is not None: 372 for e in self.advance(): 373 yield e
374
375 - def advance(self, match=False):
376 """ 377 Generator function for advance to the next element. Yields tuples with 378 particles information when occurrence violation is found. 379 380 :param match: provides current element match. 381 """ 382 def stop_item(item): 383 """ 384 Stops element or group matching, incrementing current group counter. 385 386 :return: `True` if the item has violated the minimum occurrences for itself \ 387 or for the current group, `False` otherwise. 388 """ 389 if isinstance(item, ModelGroup): 390 self.group, self.iterator, self.items, self.match = self.pop() 391 392 item_occurs = occurs[item] 393 model = self.group.model 394 if item_occurs: 395 self.match = True 396 if model == 'choice': 397 occurs[item] = 0 398 occurs[self.group] += 1 399 self.iterator, self.match = iter(self.group), False 400 else: 401 if model == 'all': 402 self.items.remove(item) 403 else: 404 self.items.pop() 405 if not self.items: 406 self.occurs[self.group] += 1 407 return item.is_missing(item_occurs) 408 409 elif model == 'sequence': 410 if self.match: 411 self.items.pop() 412 if not self.items: 413 occurs[self.group] += 1 414 return not item.is_emptiable() 415 elif item.is_emptiable(): 416 self.items.pop() 417 return False 418 elif self.group.min_occurs <= occurs[self.group] or self: 419 return stop_item(self.group) 420 else: 421 self.items.pop() 422 return True
423 424 element, occurs = self.element, self.occurs 425 if element is None: 426 raise XMLSchemaValueError("cannot advance, %r is ended!" % self) 427 428 if match: 429 occurs[element] += 1 430 self.match = True 431 if not element.is_over(occurs[element]): 432 return 433 try: 434 if stop_item(element): 435 yield element, occurs[element], [element] 436 437 while True: 438 while self.group.is_over(occurs[self.group]): 439 stop_item(self.group) 440 441 obj = next(self.iterator, None) 442 if obj is None: 443 if not self.match: 444 if self.group.model == 'all' and all(e.min_occurs == 0 for e in self.items): 445 occurs[self.group] += 1 446 group, expected = self.group, self.items 447 if stop_item(group) and expected: 448 yield group, occurs[group], self.expected 449 elif not self.items: 450 self.iterator, self.items, self.match = iter(self.group), self.group[::-1], False 451 elif self.group.model == 'all': 452 self.iterator, self.match = iter(self.items), False 453 elif all(e.min_occurs == 0 for e in self.items): 454 self.iterator, self.items, self.match = iter(self.group), self.group[::-1], False 455 occurs[self.group] += 1 456 457 elif not isinstance(obj, ModelGroup): # XsdElement or XsdAnyElement 458 self.element, occurs[obj] = obj, 0 459 return 460 461 elif obj: 462 self.append((self.group, self.iterator, self.items, self.match)) 463 self.group, self.iterator, self.items, self.match = obj, iter(obj), obj[::-1], False 464 occurs[obj] = 0 465 466 except IndexError: 467 self.element = None 468 if self.group.is_missing(occurs[self.group]) and self.items: 469 yield self.group, occurs[self.group], self.expected
470