Package ClusterShell :: Module RangeSet
[hide private]
[frames] | no frames]

Source Code for Module ClusterShell.RangeSet

   1  # 
   2  # Copyright (C) 2012-2016 CEA/DAM 
   3  # Copyright (C) 2012-2016 Aurelien Degremont <aurelien.degremont@cea.fr> 
   4  # Copyright (C) 2015-2016 Stephane Thiell <sthiell@stanford.edu> 
   5  # 
   6  # This file is part of ClusterShell. 
   7  # 
   8  # ClusterShell is free software; you can redistribute it and/or 
   9  # modify it under the terms of the GNU Lesser General Public 
  10  # License as published by the Free Software Foundation; either 
  11  # version 2.1 of the License, or (at your option) any later version. 
  12  # 
  13  # ClusterShell is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
  16  # Lesser General Public License for more details. 
  17  # 
  18  # You should have received a copy of the GNU Lesser General Public 
  19  # License along with ClusterShell; if not, write to the Free Software 
  20  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 
  21   
  22  """ 
  23  Cluster range set module. 
  24   
  25  Instances of RangeSet provide similar operations than the builtin set type, 
  26  extended to support cluster ranges-like format and stepping support ("0-8/2"). 
  27  """ 
  28   
  29  from operator import mul 
  30   
  31  try: 
  32      from itertools import product 
  33  except: 
34 # itertools.product : new in Python 2.6 35 - def product(*args, **kwds):
36 """Cartesian product of input iterables.""" 37 pools = map(tuple, args) * kwds.get('repeat', 1) 38 result = [[]] 39 for pool in pools: 40 result = [x+[y] for x in result for y in pool] 41 for prod in result: 42 yield tuple(prod)
43 44 __all__ = ['RangeSetException', 45 'RangeSetParseError', 46 'RangeSetPaddingError', 47 'RangeSet', 48 'RangeSetND', 49 'AUTOSTEP_DISABLED'] 50 51 # Special constant used to force turn off autostep feature. 52 # Note: +inf is 1E400, but a bug in python 2.4 makes it impossible to be 53 # pickled, so we use less. Later, we could consider sys.maxint here. 54 AUTOSTEP_DISABLED = 1E100
55 56 57 -class RangeSetException(Exception):
58 """Base RangeSet exception class."""
59
60 -class RangeSetParseError(RangeSetException):
61 """Raised when RangeSet parsing cannot be done properly."""
62 - def __init__(self, part, msg):
63 if part: 64 msg = "%s : \"%s\"" % (msg, part) 65 RangeSetException.__init__(self, msg) 66 # faulty subrange; this allows you to target the error 67 self.part = part
68
69 -class RangeSetPaddingError(RangeSetParseError):
70 """Raised when a fatal padding incoherency occurs"""
71 - def __init__(self, part, msg):
72 RangeSetParseError.__init__(self, part, "padding mismatch (%s)" % msg)
73
74 75 -class RangeSet(set):
76 """ 77 Mutable set of cluster node indexes featuring a fast range-based API. 78 79 This class aims to ease the management of potentially large cluster range 80 sets and is used by the :class:`.NodeSet` class. 81 82 RangeSet basic constructors: 83 84 >>> rset = RangeSet() # empty RangeSet 85 >>> rset = RangeSet("5,10-42") # contains 5, 10 to 42 86 >>> rset = RangeSet("0-10/2") # contains 0, 2, 4, 6, 8, 10 87 88 Also any iterable of integers can be specified as first argument: 89 90 >>> RangeSet([3, 6, 8, 7, 1]) 91 1,3,6-8 92 >>> rset2 = RangeSet(rset) 93 94 Padding of ranges (eg. "003-009") can be managed through a public RangeSet 95 instance variable named padding. It may be changed at any time. Padding is 96 a simple display feature per RangeSet object, thus current padding value is 97 not taken into account when computing set operations. 98 RangeSet is itself an iterator over its items as integers (instead of 99 strings). To iterate over string items with optional padding, you can use 100 the :meth:`RangeSet.striter`: method. 101 102 RangeSet provides methods like :meth:`RangeSet.union`, 103 :meth:`RangeSet.intersection`, :meth:`RangeSet.difference`, 104 :meth:`RangeSet.symmetric_difference` and their in-place versions 105 :meth:`RangeSet.update`, :meth:`RangeSet.intersection_update`, 106 :meth:`RangeSet.difference_update`, 107 :meth:`RangeSet.symmetric_difference_update` which conform to the Python 108 Set API. 109 """ 110 _VERSION = 3 # serial version number 111 112 # define __new__() to workaround built-in set subclassing with Python 2.4
113 - def __new__(cls, pattern=None, autostep=None):
114 """Object constructor""" 115 return set.__new__(cls)
116
117 - def __init__(self, pattern=None, autostep=None):
118 """Initialize RangeSet object. 119 120 :param pattern: optional string pattern 121 :param autostep: optional autostep threshold 122 """ 123 if pattern is None or isinstance(pattern, str): 124 set.__init__(self) 125 else: 126 set.__init__(self, pattern) 127 128 if isinstance(pattern, RangeSet): 129 self._autostep = pattern._autostep 130 self.padding = pattern.padding 131 else: 132 self._autostep = None 133 self.padding = None 134 self.autostep = autostep #: autostep threshold public instance attribute 135 136 if isinstance(pattern, str): 137 self._parse(pattern)
138
139 - def _parse(self, pattern):
140 """Parse string of comma-separated x-y/step -like ranges""" 141 # Comma separated ranges 142 if pattern.find(',') < 0: 143 subranges = [pattern] 144 else: 145 subranges = pattern.split(',') 146 147 for subrange in subranges: 148 if subrange.find('/') < 0: 149 step = 1 150 baserange = subrange 151 else: 152 baserange, step = subrange.split('/', 1) 153 154 try: 155 step = int(step) 156 except ValueError: 157 raise RangeSetParseError(subrange, 158 "cannot convert string to integer") 159 160 if baserange.find('-') < 0: 161 if step != 1: 162 raise RangeSetParseError(subrange, 163 "invalid step usage") 164 begin = end = baserange 165 else: 166 begin, end = baserange.split('-', 1) 167 168 # compute padding and return node range info tuple 169 try: 170 pad = 0 171 if int(begin) != 0: 172 begins = begin.lstrip("0") 173 if len(begin) - len(begins) > 0: 174 pad = len(begin) 175 start = int(begins) 176 else: 177 if len(begin) > 1: 178 pad = len(begin) 179 start = 0 180 if int(end) != 0: 181 ends = end.lstrip("0") 182 else: 183 ends = end 184 stop = int(ends) 185 except ValueError: 186 if len(subrange) == 0: 187 msg = "empty range" 188 else: 189 msg = "cannot convert string to integer" 190 raise RangeSetParseError(subrange, msg) 191 192 # check preconditions 193 if stop > 1e100 or start > stop or step < 1: 194 raise RangeSetParseError(subrange, 195 "invalid values in range") 196 197 self.add_range(start, stop + 1, step, pad)
198 199 @classmethod
200 - def fromlist(cls, rnglist, autostep=None):
201 """Class method that returns a new RangeSet with ranges from provided 202 list.""" 203 inst = RangeSet(autostep=autostep) 204 inst.updaten(rnglist) 205 return inst
206 207 @classmethod
208 - def fromone(cls, index, pad=0, autostep=None):
209 """Class method that returns a new RangeSet of one single item or 210 a single range (from integer or slice object).""" 211 inst = RangeSet(autostep=autostep) 212 # support slice object with duck-typing 213 try: 214 inst.add(index, pad) 215 except TypeError: 216 if not index.stop: 217 raise ValueError("Invalid range upper limit (%s)" % index.stop) 218 inst.add_range(index.start or 0, index.stop, index.step or 1, pad) 219 return inst
220
221 - def get_autostep(self):
222 """Get autostep value (property)""" 223 if self._autostep >= AUTOSTEP_DISABLED: 224 return None 225 else: 226 # +1 as user wants node count but it means real steps here 227 return self._autostep + 1
228
229 - def set_autostep(self, val):
230 """Set autostep value (property)""" 231 if val is None: 232 # disabled by default for compat with other cluster tools 233 self._autostep = AUTOSTEP_DISABLED 234 else: 235 # - 1 because user means node count, but we mean real steps 236 # (this operation has no effect on AUTOSTEP_DISABLED value) 237 self._autostep = int(val) - 1
238 239 autostep = property(get_autostep, set_autostep) 240
241 - def dim(self):
242 """Get the number of dimensions of this RangeSet object. Common 243 method with RangeSetND. Here, it will always return 1 unless 244 the object is empty, in that case it will return 0.""" 245 return int(len(self) > 0)
246
247 - def _sorted(self):
248 """Get sorted list from inner set.""" 249 return sorted(set.__iter__(self))
250
251 - def __iter__(self):
252 """Iterate over each element in RangeSet.""" 253 return iter(self._sorted())
254
255 - def striter(self):
256 """Iterate over each (optionally padded) string element in RangeSet.""" 257 pad = self.padding or 0 258 for i in self._sorted(): 259 yield "%0*d" % (pad, i)
260
261 - def contiguous(self):
262 """Object-based iterator over contiguous range sets.""" 263 pad = self.padding or 0 264 for sli in self._contiguous_slices(): 265 yield RangeSet.fromone(slice(sli.start, sli.stop, sli.step), pad)
266
267 - def __reduce__(self):
268 """Return state information for pickling.""" 269 return self.__class__, (str(self),), \ 270 { 'padding': self.padding, \ 271 '_autostep': self._autostep, \ 272 '_version' : RangeSet._VERSION }
273
274 - def __setstate__(self, dic):
275 """called upon unpickling""" 276 self.__dict__.update(dic) 277 if getattr(self, '_version', 0) < RangeSet._VERSION: 278 # unpickle from old version? 279 if getattr(self, '_version', 0) <= 1: 280 # v1 (no object versioning) - CSv1.3 281 setattr(self, '_ranges', [(slice(start, stop + 1, step), pad) \ 282 for start, stop, step, pad in getattr(self, '_ranges')]) 283 elif hasattr(self, '_ranges'): 284 # v2 - CSv1.4-1.5 285 self_ranges = getattr(self, '_ranges') 286 if self_ranges and type(self_ranges[0][0]) is not slice: 287 # workaround for object pickled from Python < 2.5 288 setattr(self, '_ranges', [(slice(start, stop, step), pad) \ 289 for (start, stop, step), pad in self_ranges]) 290 # convert to v3 291 for sli, pad in getattr(self, '_ranges'): 292 self.add_range(sli.start, sli.stop, sli.step, pad) 293 delattr(self, '_ranges') 294 delattr(self, '_length')
295
296 - def _strslices(self):
297 """Stringify slices list (x-y/step format)""" 298 pad = self.padding or 0 299 for sli in self.slices(): 300 if sli.start + 1 == sli.stop: 301 yield "%0*d" % (pad, sli.start) 302 else: 303 assert sli.step >= 0, "Internal error: sli.step < 0" 304 if sli.step == 1: 305 yield "%0*d-%0*d" % (pad, sli.start, pad, sli.stop - 1) 306 else: 307 yield "%0*d-%0*d/%d" % (pad, sli.start, pad, sli.stop - 1, \ 308 sli.step)
309
310 - def __str__(self):
311 """Get comma-separated range-based string (x-y/step format).""" 312 return ','.join(self._strslices())
313 314 # __repr__ is the same as __str__ as it is a valid expression that 315 # could be used to recreate a RangeSet with the same value 316 __repr__ = __str__ 317
318 - def _contiguous_slices(self):
319 """Internal iterator over contiguous slices in RangeSet.""" 320 k = j = None 321 for i in self._sorted(): 322 if k is None: 323 k = j = i 324 if i - j > 1: 325 yield slice(k, j + 1, 1) 326 k = i 327 j = i 328 if k is not None: 329 yield slice(k, j + 1, 1)
330
331 - def _folded_slices(self):
332 """Internal generator that is able to retrieve ranges organized by 333 step.""" 334 if len(self) == 0: 335 return 336 337 prng = None # pending range 338 istart = None # processing starting indice 339 step = 0 # processing step 340 for sli in self._contiguous_slices(): 341 start = sli.start 342 stop = sli.stop 343 unitary = (start + 1 == stop) # one indice? 344 if istart is None: # first loop 345 if unitary: 346 istart = start 347 else: 348 prng = [start, stop, 1] 349 istart = stop - 1 350 i = k = istart 351 elif step == 0: # istart is set but step is unknown 352 if not unitary: 353 if prng is not None: 354 # yield and replace pending range 355 yield slice(*prng) 356 else: 357 yield slice(istart, istart + 1, 1) 358 prng = [start, stop, 1] 359 istart = k = stop - 1 360 continue 361 i = start 362 else: # step > 0 363 assert step > 0 364 i = start 365 # does current range lead to broken step? 366 if step != i - k or not unitary: 367 #Python2.6+: j = i if step == i - k else k 368 if step == i - k: 369 j = i 370 else: 371 j = k 372 # stepped is True when autostep setting does apply 373 stepped = (j - istart >= self._autostep * step) 374 if prng: # yield pending range? 375 if stepped: 376 prng[1] -= 1 377 else: 378 istart += step 379 yield slice(*prng) 380 prng = None 381 if step != i - k: 382 # case: step value has changed 383 if stepped: 384 yield slice(istart, k + 1, step) 385 else: 386 for j in range(istart, k - step + 1, step): 387 yield slice(j, j + 1, 1) 388 if not unitary: 389 yield slice(k, k + 1, 1) 390 if unitary: 391 if stepped: 392 istart = i = k = start 393 else: 394 istart = k 395 else: 396 prng = [start, stop, 1] 397 istart = i = k = stop - 1 398 elif not unitary: 399 # case: broken step by contiguous range 400 if stepped: 401 # yield 'range/step' by taking first indice of new range 402 yield slice(istart, i + 1, step) 403 i += 1 404 else: 405 # autostep setting does not apply in that case 406 for j in range(istart, i - step + 1, step): 407 yield slice(j, j + 1, 1) 408 if stop > i + 1: # current->pending only if not unitary 409 prng = [i, stop, 1] 410 istart = i = k = stop - 1 411 step = i - k 412 k = i 413 # exited loop, process pending range or indice... 414 if step == 0: 415 if prng: 416 yield slice(*prng) 417 else: 418 yield slice(istart, istart + 1, 1) 419 else: 420 assert step > 0 421 stepped = (k - istart >= self._autostep * step) 422 if prng: 423 if stepped: 424 prng[1] -= 1 425 else: 426 istart += step 427 yield slice(*prng) 428 prng = None 429 if stepped: 430 yield slice(istart, i + 1, step) 431 else: 432 for j in range(istart, i + 1, step): 433 yield slice(j, j + 1, 1)
434
435 - def slices(self):
436 """ 437 Iterate over RangeSet ranges as Python slice objects. 438 """ 439 # return an iterator 440 if self._autostep >= AUTOSTEP_DISABLED: 441 # autostep disabled: call simpler method to return only a:b slices 442 return self._contiguous_slices() 443 else: 444 # autostep enabled: call generic method to return a:b:step slices 445 return self._folded_slices()
446
447 - def __getitem__(self, index):
448 """ 449 Return the element at index or a subrange when a slice is specified. 450 """ 451 if isinstance(index, slice): 452 inst = RangeSet() 453 inst._autostep = self._autostep 454 inst.padding = self.padding 455 inst.update(self._sorted()[index]) 456 return inst 457 elif isinstance(index, int): 458 return self._sorted()[index] 459 else: 460 raise TypeError, \ 461 "%s indices must be integers" % self.__class__.__name__
462
463 - def split(self, nbr):
464 """ 465 Split the rangeset into nbr sub-rangesets (at most). Each 466 sub-rangeset will have the same number of elements more or 467 less 1. Current rangeset remains unmodified. Returns an 468 iterator. 469 470 >>> RangeSet("1-5").split(3) 471 RangeSet("1-2") 472 RangeSet("3-4") 473 RangeSet("foo5") 474 """ 475 assert(nbr > 0) 476 477 # We put the same number of element in each sub-nodeset. 478 slice_size = len(self) / nbr 479 left = len(self) % nbr 480 481 begin = 0 482 for i in range(0, min(nbr, len(self))): 483 length = slice_size + int(i < left) 484 yield self[begin:begin + length] 485 begin += length
486
487 - def add_range(self, start, stop, step=1, pad=0):
488 """ 489 Add a range (start, stop, step and padding length) to RangeSet. 490 Like the Python built-in function *range()*, the last element 491 is the largest start + i * step less than stop. 492 """ 493 assert start < stop, "please provide ordered node index ranges" 494 assert step > 0 495 assert pad >= 0 496 assert stop - start < 1e9, "range too large" 497 498 if pad > 0 and self.padding is None: 499 self.padding = pad 500 set.update(self, range(start, stop, step))
501
502 - def copy(self):
503 """Return a shallow copy of a RangeSet.""" 504 cpy = self.__class__() 505 cpy._autostep = self._autostep 506 cpy.padding = self.padding 507 cpy.update(self) 508 return cpy
509 510 __copy__ = copy # For the copy module 511
512 - def __eq__(self, other):
513 """ 514 RangeSet equality comparison. 515 """ 516 # Return NotImplemented instead of raising TypeError, to 517 # indicate that the comparison is not implemented with respect 518 # to the other type (the other comparand then gets a change to 519 # determine the result, then it falls back to object address 520 # comparison). 521 if not isinstance(other, RangeSet): 522 return NotImplemented 523 return len(self) == len(other) and self.issubset(other)
524 525 # Standard set operations: union, intersection, both differences. 526 # Each has an operator version (e.g. __or__, invoked with |) and a 527 # method version (e.g. union). 528 # Subtle: Each pair requires distinct code so that the outcome is 529 # correct when the type of other isn't suitable. For example, if 530 # we did "union = __or__" instead, then Set().union(3) would return 531 # NotImplemented instead of raising TypeError (albeit that *why* it 532 # raises TypeError as-is is also a bit subtle). 533
534 - def _wrap_set_op(self, fun, arg):
535 """Wrap built-in set operations for RangeSet to workaround built-in set 536 base class issues (RangeSet.__new/init__ not called)""" 537 result = fun(self, arg) 538 result._autostep = self._autostep 539 result.padding = self.padding 540 return result
541
542 - def __or__(self, other):
543 """Return the union of two RangeSets as a new RangeSet. 544 545 (I.e. all elements that are in either set.) 546 """ 547 if not isinstance(other, set): 548 return NotImplemented 549 return self.union(other)
550
551 - def union(self, other):
552 """Return the union of two RangeSets as a new RangeSet. 553 554 (I.e. all elements that are in either set.) 555 """ 556 return self._wrap_set_op(set.union, other)
557
558 - def __and__(self, other):
559 """Return the intersection of two RangeSets as a new RangeSet. 560 561 (I.e. all elements that are in both sets.) 562 """ 563 if not isinstance(other, set): 564 return NotImplemented 565 return self.intersection(other)
566
567 - def intersection(self, other):
568 """Return the intersection of two RangeSets as a new RangeSet. 569 570 (I.e. all elements that are in both sets.) 571 """ 572 return self._wrap_set_op(set.intersection, other)
573
574 - def __xor__(self, other):
575 """Return the symmetric difference of two RangeSets as a new RangeSet. 576 577 (I.e. all elements that are in exactly one of the sets.) 578 """ 579 if not isinstance(other, set): 580 return NotImplemented 581 return self.symmetric_difference(other)
582
583 - def symmetric_difference(self, other):
584 """Return the symmetric difference of two RangeSets as a new RangeSet. 585 586 (ie. all elements that are in exactly one of the sets.) 587 """ 588 return self._wrap_set_op(set.symmetric_difference, other)
589
590 - def __sub__(self, other):
591 """Return the difference of two RangeSets as a new RangeSet. 592 593 (I.e. all elements that are in this set and not in the other.) 594 """ 595 if not isinstance(other, set): 596 return NotImplemented 597 return self.difference(other)
598
599 - def difference(self, other):
600 """Return the difference of two RangeSets as a new RangeSet. 601 602 (I.e. all elements that are in this set and not in the other.) 603 """ 604 return self._wrap_set_op(set.difference, other)
605 606 # Membership test 607
608 - def __contains__(self, element):
609 """Report whether an element is a member of a RangeSet. 610 Element can be either another RangeSet object, a string or an 611 integer. 612 613 Called in response to the expression ``element in self``. 614 """ 615 if isinstance(element, set): 616 return element.issubset(self) 617 618 return set.__contains__(self, int(element))
619 620 # Subset and superset test 621
622 - def issubset(self, other):
623 """Report whether another set contains this RangeSet.""" 624 self._binary_sanity_check(other) 625 return set.issubset(self, other)
626
627 - def issuperset(self, other):
628 """Report whether this RangeSet contains another set.""" 629 self._binary_sanity_check(other) 630 return set.issuperset(self, other)
631 632 # Inequality comparisons using the is-subset relation. 633 __le__ = issubset 634 __ge__ = issuperset 635
636 - def __lt__(self, other):
637 self._binary_sanity_check(other) 638 return len(self) < len(other) and self.issubset(other)
639
640 - def __gt__(self, other):
641 self._binary_sanity_check(other) 642 return len(self) > len(other) and self.issuperset(other)
643 644 # Assorted helpers 645
646 - def _binary_sanity_check(self, other):
647 """Check that the other argument to a binary operation is also a set, 648 raising a TypeError otherwise.""" 649 if not isinstance(other, set): 650 raise TypeError, "Binary operation only permitted between sets"
651 652 # In-place union, intersection, differences. 653 # Subtle: The xyz_update() functions deliberately return None, 654 # as do all mutating operations on built-in container types. 655 # The __xyz__ spellings have to return self, though. 656
657 - def __ior__(self, other):
658 """Update a RangeSet with the union of itself and another.""" 659 self._binary_sanity_check(other) 660 set.__ior__(self, other) 661 return self
662
663 - def union_update(self, other):
664 """Update a RangeSet with the union of itself and another.""" 665 self.update(other)
666
667 - def __iand__(self, other):
668 """Update a RangeSet with the intersection of itself and another.""" 669 self._binary_sanity_check(other) 670 set.__iand__(self, other) 671 return self
672
673 - def intersection_update(self, other):
674 """Update a RangeSet with the intersection of itself and another.""" 675 set.intersection_update(self, other)
676
677 - def __ixor__(self, other):
678 """Update a RangeSet with the symmetric difference of itself and 679 another.""" 680 self._binary_sanity_check(other) 681 set.symmetric_difference_update(self, other) 682 return self
683
684 - def symmetric_difference_update(self, other):
685 """Update a RangeSet with the symmetric difference of itself and 686 another.""" 687 set.symmetric_difference_update(self, other)
688
689 - def __isub__(self, other):
690 """Remove all elements of another set from this RangeSet.""" 691 self._binary_sanity_check(other) 692 set.difference_update(self, other) 693 return self
694
695 - def difference_update(self, other, strict=False):
696 """Remove all elements of another set from this RangeSet. 697 698 If strict is True, raise KeyError if an element cannot be removed. 699 (strict is a RangeSet addition)""" 700 if strict and other not in self: 701 raise KeyError(other.difference(self)[0]) 702 set.difference_update(self, other)
703 704 # Python dict-like mass mutations: update, clear 705
706 - def update(self, iterable):
707 """Add all integers from an iterable (such as a list).""" 708 if isinstance(iterable, RangeSet): 709 # keep padding unless it has not been defined yet 710 if self.padding is None and iterable.padding is not None: 711 self.padding = iterable.padding 712 assert type(iterable) is not str 713 set.update(self, iterable)
714
715 - def updaten(self, rangesets):
716 """ 717 Update a rangeset with the union of itself and several others. 718 """ 719 for rng in rangesets: 720 if isinstance(rng, set): 721 self.update(rng) 722 else: 723 self.update(RangeSet(rng))
724 # py2.5+ 725 #self.update(rng if isinstance(rng, set) else RangeSet(rng)) 726
727 - def clear(self):
728 """Remove all elements from this RangeSet.""" 729 set.clear(self) 730 self.padding = None
731 732 # Single-element mutations: add, remove, discard 733
734 - def add(self, element, pad=0):
735 """Add an element to a RangeSet. 736 This has no effect if the element is already present. 737 """ 738 set.add(self, int(element)) 739 if pad > 0 and self.padding is None: 740 self.padding = pad
741
742 - def remove(self, element):
743 """Remove an element from a RangeSet; it must be a member. 744 745 :param element: the element to remove 746 :raises KeyError: element is not contained in RangeSet 747 :raises ValueError: element is not castable to integer 748 """ 749 set.remove(self, int(element))
750
751 - def discard(self, element):
752 """Remove element from the RangeSet if it is a member. 753 754 If the element is not a member, do nothing. 755 """ 756 try: 757 i = int(element) 758 set.discard(self, i) 759 except ValueError: 760 pass # ignore other object types
761
762 763 -class RangeSetND(object):
764 """ 765 Build a N-dimensional RangeSet object. 766 767 .. warning:: You don't usually need to use this class directly, use 768 :class:`.NodeSet` instead that has ND support. 769 770 Empty constructor:: 771 772 RangeSetND() 773 774 Build from a list of list of :class:`RangeSet` objects:: 775 776 RangeSetND([[rs1, rs2, rs3, ...], ...]) 777 778 Strings are also supported:: 779 780 RangeSetND([["0-3", "4-10", ...], ...]) 781 782 Integers are also supported:: 783 784 RangeSetND([(0, 4), (0, 5), (1, 4), (1, 5), ...] 785 """
786 - def __init__(self, args=None, pads=None, autostep=None, copy_rangeset=True):
787 """RangeSetND initializer 788 789 All parameters are optional. 790 791 :param args: generic "list of list" input argument (default is None) 792 :param pads: list of 0-padding length (default is to not pad any 793 dimensions) 794 :param autostep: autostep threshold (use range/step notation if more 795 than #autostep items meet the condition) - default is 796 off (None) 797 :param copy_rangeset: (advanced) if set to False, do not copy RangeSet 798 objects from args (transfer ownership), which is 799 faster. In that case, you should not modify these 800 objects afterwards (default is True). 801 """ 802 # RangeSetND are arranged as a list of N-dimensional RangeSet vectors 803 self._veclist = [] 804 # Dirty flag to avoid doing veclist folding too often 805 self._dirty = True 806 # Initialize autostep through property 807 self._autostep = None 808 self.autostep = autostep #: autostep threshold public instance attribute 809 # Hint on whether several dimensions are varying or not 810 self._multivar_hint = False 811 if args is None: 812 return 813 for rgvec in args: 814 if rgvec: 815 if type(rgvec[0]) is str: 816 self._veclist.append([RangeSet(rg, autostep=autostep) \ 817 for rg in rgvec]) 818 elif isinstance(rgvec[0], RangeSet): 819 if copy_rangeset: 820 self._veclist.append([rg.copy() for rg in rgvec]) 821 else: 822 self._veclist.append(rgvec) 823 else: 824 if pads is None: 825 self._veclist.append( \ 826 [RangeSet.fromone(rg, autostep=autostep) \ 827 for rg in rgvec]) 828 else: 829 self._veclist.append( \ 830 [RangeSet.fromone(rg, pad, autostep) \ 831 for rg, pad in zip(rgvec, pads)])
832
833 - class precond_fold(object):
834 """Decorator to ease internal folding management"""
835 - def __call__(self, func):
836 def inner(*args, **kwargs): 837 rgnd, fargs = args[0], args[1:] 838 if rgnd._dirty: 839 rgnd._fold() 840 return func(rgnd, *fargs, **kwargs)
841 # modify the decorator meta-data for pydoc 842 # Note: should be later replaced by @wraps (functools) 843 # as of Python 2.5 844 inner.__name__ = func.__name__ 845 inner.__doc__ = func.__doc__ 846 inner.__dict__ = func.__dict__ 847 inner.__module__ = func.__module__ 848 return inner
849 850 @precond_fold()
851 - def copy(self):
852 """Return a new, mutable shallow copy of a RangeSetND.""" 853 cpy = self.__class__() 854 # Shallow "to the extent possible" says the copy module, so here that 855 # means calling copy() on each sub-RangeSet to keep mutability. 856 cpy._veclist = [[rg.copy() for rg in rgvec] for rgvec in self._veclist] 857 cpy._dirty = self._dirty 858 return cpy
859 860 __copy__ = copy # For the copy module 861
862 - def __eq__(self, other):
863 """RangeSetND equality comparison.""" 864 # Return NotImplemented instead of raising TypeError, to 865 # indicate that the comparison is not implemented with respect 866 # to the other type (the other comparand then gets a change to 867 # determine the result, then it falls back to object address 868 # comparison). 869 if not isinstance(other, RangeSetND): 870 return NotImplemented 871 return len(self) == len(other) and self.issubset(other)
872
873 - def __nonzero__(self):
874 return bool(self._veclist)
875
876 - def __len__(self):
877 """Count unique elements in N-dimensional rangeset.""" 878 return sum([reduce(mul, [len(rg) for rg in rgvec]) \ 879 for rgvec in self.veclist])
880 881 @precond_fold()
882 - def __str__(self):
883 """String representation of N-dimensional RangeSet.""" 884 result = "" 885 for rgvec in self._veclist: 886 result += "; ".join([str(rg) for rg in rgvec]) 887 result += "\n" 888 return result
889 890 @precond_fold()
891 - def __iter__(self):
892 return self._iter()
893
894 - def _iter(self):
895 """Iterate through individual items as tuples.""" 896 for vec in self._veclist: 897 for ivec in product(*vec): 898 yield ivec
899 900 @precond_fold()
901 - def iter_padding(self):
902 """Iterate through individual items as tuples with padding info.""" 903 for vec in self._veclist: 904 for ivec in product(*vec): 905 yield ivec, [rg.padding for rg in vec]
906 907 @precond_fold()
908 - def _get_veclist(self):
909 """Get folded veclist""" 910 return self._veclist
911
912 - def _set_veclist(self, val):
913 """Set veclist and set dirty flag for deferred folding.""" 914 self._veclist = val 915 self._dirty = True
916 917 veclist = property(_get_veclist, _set_veclist) 918
919 - def vectors(self):
920 """Get underlying :class:`RangeSet` vectors""" 921 return iter(self.veclist)
922
923 - def dim(self):
924 """Get the current number of dimensions of this RangeSetND 925 object. Return 0 when object is empty.""" 926 try: 927 return len(self._veclist[0]) 928 except IndexError: 929 return 0
930
931 - def pads(self):
932 """Get a tuple of padding length info for each dimension.""" 933 # return a tuple of max padding length for each axis 934 pad_veclist = ((rg.padding for rg in vec) for vec in self._veclist) 935 return tuple(max(pads) for pads in zip(*pad_veclist))
936
937 - def get_autostep(self):
938 """Get autostep value (property)""" 939 if self._autostep >= AUTOSTEP_DISABLED: 940 return None 941 else: 942 # +1 as user wants node count but _autostep means real steps here 943 return self._autostep + 1
944
945 - def set_autostep(self, val):
946 """Set autostep value (property)""" 947 # Must conform to RangeSet.autostep logic 948 if val is None: 949 self._autostep = AUTOSTEP_DISABLED 950 else: 951 # Like in RangeSet.set_autostep(): -1 because user means node count, 952 # but we mean real steps (this operation has no effect on 953 # AUTOSTEP_DISABLED value) 954 self._autostep = int(val) - 1 955 956 # Update our RangeSet objects 957 for rgvec in self._veclist: 958 for rg in rgvec: 959 rg._autostep = self._autostep
960 961 autostep = property(get_autostep, set_autostep) 962 963 @precond_fold()
964 - def __getitem__(self, index):
965 """ 966 Return the element at index or a subrange when a slice is specified. 967 """ 968 if isinstance(index, slice): 969 iveclist = [] 970 for rgvec in self._veclist: 971 iveclist += product(*rgvec) 972 assert(len(iveclist) == len(self)) 973 rnd = RangeSetND(iveclist[index], 974 pads=[rg.padding for rg in self._veclist[0]], 975 autostep=self.autostep) 976 return rnd 977 978 elif isinstance(index, int): 979 # find a tuple of integer (multi-dimensional) at position index 980 if index < 0: 981 length = len(self) 982 if index >= -length: 983 index = length + index 984 else: 985 raise IndexError, "%d out of range" % index 986 length = 0 987 for rgvec in self._veclist: 988 cnt = reduce(mul, [len(rg) for rg in rgvec]) 989 if length + cnt < index: 990 length += cnt 991 else: 992 for ivec in product(*rgvec): 993 if index == length: 994 return ivec 995 length += 1 996 raise IndexError, "%d out of range" % index 997 else: 998 raise TypeError, \ 999 "%s indices must be integers" % self.__class__.__name__
1000 1001 @precond_fold()
1002 - def contiguous(self):
1003 """Object-based iterator over contiguous range sets.""" 1004 veclist = self._veclist 1005 try: 1006 dim = len(veclist[0]) 1007 except IndexError: 1008 return 1009 for dimidx in range(dim): 1010 new_veclist = [] 1011 for rgvec in veclist: 1012 for rgsli in rgvec[dimidx].contiguous(): 1013 rgvec = list(rgvec) 1014 rgvec[dimidx] = rgsli 1015 new_veclist.append(rgvec) 1016 veclist = new_veclist 1017 for rgvec in veclist: 1018 yield RangeSetND([rgvec])
1019 1020 # Membership test 1021 1022 @precond_fold()
1023 - def __contains__(self, element):
1024 """Report whether an element is a member of a RangeSetND. 1025 Element can be either another RangeSetND object, a string or 1026 an integer. 1027 1028 Called in response to the expression ``element in self``. 1029 """ 1030 if isinstance(element, RangeSetND): 1031 rgnd_element = element 1032 else: 1033 rgnd_element = RangeSetND([[str(element)]]) 1034 return rgnd_element.issubset(self)
1035 1036 # Subset and superset test 1037
1038 - def issubset(self, other):
1039 """Report whether another set contains this RangeSetND.""" 1040 self._binary_sanity_check(other) 1041 return other.issuperset(self)
1042 1043 @precond_fold()
1044 - def issuperset(self, other):
1045 """Report whether this RangeSetND contains another RangeSetND.""" 1046 self._binary_sanity_check(other) 1047 if self.dim() == 1 and other.dim() == 1: 1048 return self._veclist[0][0].issuperset(other._veclist[0][0]) 1049 if not other._veclist: 1050 return True 1051 test = other.copy() 1052 test.difference_update(self) 1053 return not bool(test)
1054 1055 # Inequality comparisons using the is-subset relation. 1056 __le__ = issubset 1057 __ge__ = issuperset 1058
1059 - def __lt__(self, other):
1060 self._binary_sanity_check(other) 1061 return len(self) < len(other) and self.issubset(other)
1062
1063 - def __gt__(self, other):
1064 self._binary_sanity_check(other) 1065 return len(self) > len(other) and self.issuperset(other)
1066 1067 # Assorted helpers 1068
1069 - def _binary_sanity_check(self, other):
1070 """Check that the other argument to a binary operation is also a 1071 RangeSetND, raising a TypeError otherwise.""" 1072 if not isinstance(other, RangeSetND): 1073 raise TypeError, \ 1074 "Binary operation only permitted between RangeSetND"
1075
1076 - def _sort(self):
1077 """N-dimensional sorting.""" 1078 def rgveckeyfunc(rgvec): 1079 # key used for sorting purposes, based on the following 1080 # conditions: 1081 # (1) larger vector first (#elements) 1082 # (2) larger dim first (#elements) 1083 # (3) lower first index first 1084 # (4) lower last index first 1085 return (-reduce(mul, [len(rg) for rg in rgvec]), \ 1086 tuple((-len(rg), rg[0], rg[-1]) for rg in rgvec))
1087 self._veclist.sort(key=rgveckeyfunc) 1088 1089 @precond_fold()
1090 - def fold(self):
1091 """Explicit folding call. Please note that folding of RangeSetND 1092 nD vectors are automatically managed, so you should not have to 1093 call this method. It may be still useful in some extreme cases 1094 where the RangeSetND is heavily modified.""" 1095 pass
1096
1097 - def _fold(self):
1098 """In-place N-dimensional folding.""" 1099 assert self._dirty 1100 if len(self._veclist) > 1: 1101 self._fold_univariate() or self._fold_multivariate() 1102 else: 1103 self._dirty = False
1104
1105 - def _fold_univariate(self):
1106 """Univariate nD folding. Return True on success and False when 1107 a multivariate folding is required.""" 1108 dim = self.dim() 1109 vardim = dimdiff = 0 1110 if dim > 1: 1111 # We got more than one dimension, see if only one is changing... 1112 for i in range(dim): 1113 # Are all rangesets on this dimension the same? 1114 slist = [vec[i] for vec in self._veclist] 1115 if slist.count(slist[0]) != len(slist): 1116 dimdiff += 1 1117 if dimdiff > 1: 1118 break 1119 vardim = i 1120 univar = (dim == 1 or dimdiff == 1) 1121 if univar: 1122 # Eligible for univariate folding (faster!) 1123 for vec in self._veclist[1:]: 1124 self._veclist[0][vardim].update(vec[vardim]) 1125 del self._veclist[1:] 1126 self._dirty = False 1127 self._multivar_hint = not univar 1128 return univar
1129
1130 - def _fold_multivariate(self):
1131 """Multivariate nD folding""" 1132 # PHASE 1: expand with respect to uniqueness 1133 self._fold_multivariate_expand() 1134 self._sort() 1135 # PHASE 2: merge 1136 self._fold_multivariate_merge() 1137 self._sort() 1138 self._dirty = False
1139
1140 - def _fold_multivariate_expand(self):
1141 """Multivariate nD folding: expand [phase 1]""" 1142 max_length = sum([reduce(mul, [len(rg) for rg in rgvec]) \ 1143 for rgvec in self._veclist]) 1144 # Simple heuristic that makes us faster 1145 if len(self._veclist) * (len(self._veclist) - 1) / 2 > max_length * 10: 1146 # *** nD full expand is preferred *** 1147 pads = self.pads() 1148 self._veclist = [[RangeSet.fromone(i, pad=pads[axis]) 1149 for axis, i in enumerate(tvec)] 1150 for tvec in set(self._iter())] 1151 return 1152 1153 # *** nD compare algorithm is preferred *** 1154 index1, index2 = 0, 1 1155 while (index1 + 1) < len(self._veclist): 1156 # use 2 references on iterator to compare items by couples 1157 item1 = self._veclist[index1] 1158 index2 = index1 + 1 1159 index1 += 1 1160 while index2 < len(self._veclist): 1161 item2 = self._veclist[index2] 1162 index2 += 1 1163 new_item = None 1164 disjoint = False 1165 suppl = [] 1166 for pos, (rg1, rg2) in enumerate(zip(item1, item2)): 1167 if not rg1 & rg2: 1168 disjoint = True 1169 break 1170 1171 if new_item is None: 1172 new_item = [None] * len(item1) 1173 1174 if rg1 == rg2: 1175 new_item[pos] = rg1 1176 else: 1177 assert rg1 & rg2 1178 # intersection 1179 new_item[pos] = rg1 & rg2 1180 # create part 1 1181 if rg1 - rg2: 1182 item1_p = item1[0:pos] + [rg1 - rg2] + item1[pos+1:] 1183 suppl.append(item1_p) 1184 # create part 2 1185 if rg2 - rg1: 1186 item2_p = item2[0:pos] + [rg2 - rg1] + item2[pos+1:] 1187 suppl.append(item2_p) 1188 if not disjoint: 1189 assert new_item is not None 1190 assert suppl is not None 1191 item1 = self._veclist[index1 - 1] = new_item 1192 index2 -= 1 1193 self._veclist.pop(index2) 1194 self._veclist += suppl
1195
1196 - def _fold_multivariate_merge(self):
1197 """Multivariate nD folding: merge [phase 2]""" 1198 chg = True 1199 while chg: 1200 chg = False 1201 index1, index2 = 0, 1 1202 while (index1 + 1) < len(self._veclist): 1203 # use 2 references on iterator to compare items by couples 1204 item1 = self._veclist[index1] 1205 index2 = index1 + 1 1206 index1 += 1 1207 while index2 < len(self._veclist): 1208 item2 = self._veclist[index2] 1209 index2 += 1 1210 new_item = [None] * len(item1) 1211 nb_diff = 0 1212 # compare 2 rangeset vector, item by item, the idea being 1213 # to merge vectors if they differ only by one item 1214 for pos, (rg1, rg2) in enumerate(zip(item1, item2)): 1215 if rg1 == rg2: 1216 new_item[pos] = rg1 1217 elif not rg1 & rg2: # merge on disjoint ranges 1218 nb_diff += 1 1219 if nb_diff > 1: 1220 break 1221 new_item[pos] = rg1 | rg2 1222 # if fully contained, keep the largest one 1223 elif (rg1 > rg2 or rg1 < rg2): # and nb_diff == 0: 1224 nb_diff += 1 1225 if nb_diff > 1: 1226 break 1227 new_item[pos] = max(rg1, rg2) 1228 # otherwise, compute rangeset intersection and 1229 # keep the two disjoint part to be handled 1230 # later... 1231 else: 1232 # intersection but do nothing 1233 nb_diff = 2 1234 break 1235 # one change has been done: use this new item to compare 1236 # with other 1237 if nb_diff <= 1: 1238 chg = True 1239 item1 = self._veclist[index1 - 1] = new_item 1240 index2 -= 1 1241 self._veclist.pop(index2)
1242
1243 - def __or__(self, other):
1244 """Return the union of two RangeSetNDs as a new RangeSetND. 1245 1246 (I.e. all elements that are in either set.) 1247 """ 1248 if not isinstance(other, RangeSetND): 1249 return NotImplemented 1250 return self.union(other)
1251
1252 - def union(self, other):
1253 """Return the union of two RangeSetNDs as a new RangeSetND. 1254 1255 (I.e. all elements that are in either set.) 1256 """ 1257 rgnd_copy = self.copy() 1258 rgnd_copy.update(other) 1259 return rgnd_copy
1260
1261 - def update(self, other):
1262 """Add all RangeSetND elements to this RangeSetND.""" 1263 if isinstance(other, RangeSetND): 1264 iterable = other._veclist 1265 else: 1266 iterable = other 1267 for vec in iterable: 1268 # copy rangesets and set custom autostep 1269 assert isinstance(vec[0], RangeSet) 1270 cpyvec = [] 1271 for rg in vec: 1272 cpyrg = rg.copy() 1273 cpyrg.autostep = self.autostep 1274 cpyvec.append(cpyrg) 1275 self._veclist.append(cpyvec) 1276 self._dirty = True 1277 if not self._multivar_hint: 1278 self._fold_univariate()
1279 1280 union_update = update 1281
1282 - def __ior__(self, other):
1283 """Update a RangeSetND with the union of itself and another.""" 1284 self._binary_sanity_check(other) 1285 self.update(other) 1286 return self
1287
1288 - def __isub__(self, other):
1289 """Remove all elements of another set from this RangeSetND.""" 1290 self._binary_sanity_check(other) 1291 self.difference_update(other) 1292 return self
1293
1294 - def difference_update(self, other, strict=False):
1295 """Remove all elements of another set from this RangeSetND. 1296 1297 If strict is True, raise KeyError if an element cannot be removed 1298 (strict is a RangeSet addition)""" 1299 if strict and not other in self: 1300 raise KeyError(other.difference(self)[0]) 1301 1302 ergvx = other._veclist # read only 1303 rgnd_new = [] 1304 index1 = 0 1305 while index1 < len(self._veclist): 1306 rgvec1 = self._veclist[index1] 1307 procvx1 = [ rgvec1 ] 1308 nextvx1 = [] 1309 index2 = 0 1310 while index2 < len(ergvx): 1311 rgvec2 = ergvx[index2] 1312 while len(procvx1) > 0: # refine diff for each resulting vector 1313 rgproc1 = procvx1.pop(0) 1314 tmpvx = [] 1315 for pos, (rg1, rg2) in enumerate(zip(rgproc1, rgvec2)): 1316 if rg1 == rg2 or rg1 < rg2: # issubset 1317 pass 1318 elif rg1 & rg2: # intersect 1319 tmpvec = list(rgproc1) 1320 tmpvec[pos] = rg1.difference(rg2) 1321 tmpvx.append(tmpvec) 1322 else: # disjoint 1323 tmpvx = [ rgproc1 ] # reset previous work 1324 break 1325 if tmpvx: 1326 nextvx1 += tmpvx 1327 if nextvx1: 1328 procvx1 = nextvx1 1329 nextvx1 = [] 1330 index2 += 1 1331 if procvx1: 1332 rgnd_new += procvx1 1333 index1 += 1 1334 self.veclist = rgnd_new
1335
1336 - def __sub__(self, other):
1337 """Return the difference of two RangeSetNDs as a new RangeSetND. 1338 1339 (I.e. all elements that are in this set and not in the other.) 1340 """ 1341 if not isinstance(other, RangeSetND): 1342 return NotImplemented 1343 return self.difference(other)
1344
1345 - def difference(self, other):
1346 """ 1347 ``s.difference(t)`` returns a new object with elements in s 1348 but not in t. 1349 """ 1350 self_copy = self.copy() 1351 self_copy.difference_update(other) 1352 return self_copy
1353
1354 - def intersection(self, other):
1355 """ 1356 ``s.intersection(t)`` returns a new object with elements common 1357 to s and t. 1358 """ 1359 self_copy = self.copy() 1360 self_copy.intersection_update(other) 1361 return self_copy
1362
1363 - def __and__(self, other):
1364 """ 1365 Implements the & operator. So ``s & t`` returns a new object 1366 with elements common to s and t. 1367 """ 1368 if not isinstance(other, RangeSetND): 1369 return NotImplemented 1370 return self.intersection(other)
1371
1372 - def intersection_update(self, other):
1373 """ 1374 ``s.intersection_update(t)`` returns nodeset s keeping only 1375 elements also found in t. 1376 """ 1377 if other is self: 1378 return 1379 1380 tmp_rnd = RangeSetND() 1381 1382 empty_rset = RangeSet() 1383 1384 for rgvec in self._veclist: 1385 for ergvec in other._veclist: 1386 irgvec = [rg.intersection(erg) \ 1387 for rg, erg in zip(rgvec, ergvec)] 1388 if not empty_rset in irgvec: 1389 tmp_rnd.update([irgvec]) 1390 # substitute 1391 self.veclist = tmp_rnd.veclist
1392
1393 - def __iand__(self, other):
1394 """ 1395 Implements the &= operator. So ``s &= t`` returns object s 1396 keeping only elements also found in t (Python 2.5+ required). 1397 """ 1398 self._binary_sanity_check(other) 1399 self.intersection_update(other) 1400 return self
1401
1402 - def symmetric_difference(self, other):
1403 """ 1404 ``s.symmetric_difference(t)`` returns the symmetric difference 1405 of two objects as a new RangeSetND. 1406 1407 (ie. all items that are in exactly one of the RangeSetND.) 1408 """ 1409 self_copy = self.copy() 1410 self_copy.symmetric_difference_update(other) 1411 return self_copy
1412
1413 - def __xor__(self, other):
1414 """ 1415 Implement the ^ operator. So ``s ^ t`` returns a new RangeSetND 1416 with nodes that are in exactly one of the RangeSetND. 1417 """ 1418 if not isinstance(other, RangeSetND): 1419 return NotImplemented 1420 return self.symmetric_difference(other)
1421
1422 - def symmetric_difference_update(self, other):
1423 """ 1424 ``s.symmetric_difference_update(t)`` returns RangeSetND s 1425 keeping all nodes that are in exactly one of the objects. 1426 """ 1427 diff2 = other.difference(self) 1428 self.difference_update(other) 1429 self.update(diff2)
1430
1431 - def __ixor__(self, other):
1432 """ 1433 Implement the ^= operator. So ``s ^= t`` returns object s after 1434 keeping all items that are in exactly one of the RangeSetND 1435 (Python 2.5+ required). 1436 """ 1437 self._binary_sanity_check(other) 1438 self.symmetric_difference_update(other) 1439 return self
1440