# HG changeset patch # User Yuya Nishihara # Date 1476606531 -32400 # Node ID 1be65deb3d541f71ea401edfcc8f5a47dd8eac89 # Parent b6c051cd1231910b0981edb01b173cd53a3338d0 smartset: move set classes and related functions from revset module (API) These classes are pretty large and independent from revset computation. 2961 mercurial/revset.py 973 mercurial/smartset.py 3934 total revset.prettyformatset() is renamed to smartset.prettyformat(). Smartset classes are aliased since they are quite common in revset.py. diff -r b6c051cd1231 -r 1be65deb3d54 mercurial/commands.py --- a/mercurial/commands.py Wed Jan 25 22:39:17 2017 +0900 +++ b/mercurial/commands.py Sun Oct 16 17:28:51 2016 +0900 @@ -59,6 +59,7 @@ revset, scmutil, server, + smartset, sshserver, sslutil, streamclone, @@ -2804,8 +2805,8 @@ arevs = revset.makematcher(treebystage['analyzed'])(repo) brevs = revset.makematcher(treebystage['optimized'])(repo) if ui.verbose: - ui.note(("* analyzed set:\n"), revset.prettyformatset(arevs), "\n") - ui.note(("* optimized set:\n"), revset.prettyformatset(brevs), "\n") + ui.note(("* analyzed set:\n"), smartset.prettyformat(arevs), "\n") + ui.note(("* optimized set:\n"), smartset.prettyformat(brevs), "\n") arevs = list(arevs) brevs = list(brevs) if arevs == brevs: @@ -2828,7 +2829,7 @@ func = revset.makematcher(tree) revs = func(repo) if ui.verbose: - ui.note(("* set:\n"), revset.prettyformatset(revs), "\n") + ui.note(("* set:\n"), smartset.prettyformat(revs), "\n") for c in revs: ui.write("%s\n" % c) diff -r b6c051cd1231 -r 1be65deb3d54 mercurial/revset.py --- a/mercurial/revset.py Wed Jan 25 22:39:17 2017 +0900 +++ b/mercurial/revset.py Sun Oct 16 17:28:51 2016 +0900 @@ -26,9 +26,15 @@ pycompat, registrar, repoview, + smartset, util, ) +baseset = smartset.baseset +generatorset = smartset.generatorset +spanset = smartset.spanset +fullreposet = smartset.fullreposet + def _revancestors(repo, revs, followfirst): """Like revlog.ancestors(), but supports followfirst.""" if followfirst: @@ -2940,967 +2946,6 @@ funcs.add(tree[1][1]) return funcs -def _formatsetrepr(r): - """Format an optional printable representation of a set - - ======== ================================= - type(r) example - ======== ================================= - tuple ('', other) - str '' - callable lambda: '' % sorted(b) - object other - ======== ================================= - """ - if r is None: - return '' - elif isinstance(r, tuple): - return r[0] % r[1:] - elif isinstance(r, str): - return r - elif callable(r): - return r() - else: - return repr(r) - -class abstractsmartset(object): - - def __nonzero__(self): - """True if the smartset is not empty""" - raise NotImplementedError() - - def __contains__(self, rev): - """provide fast membership testing""" - raise NotImplementedError() - - def __iter__(self): - """iterate the set in the order it is supposed to be iterated""" - raise NotImplementedError() - - # Attributes containing a function to perform a fast iteration in a given - # direction. A smartset can have none, one, or both defined. - # - # Default value is None instead of a function returning None to avoid - # initializing an iterator just for testing if a fast method exists. - fastasc = None - fastdesc = None - - def isascending(self): - """True if the set will iterate in ascending order""" - raise NotImplementedError() - - def isdescending(self): - """True if the set will iterate in descending order""" - raise NotImplementedError() - - def istopo(self): - """True if the set will iterate in topographical order""" - raise NotImplementedError() - - def min(self): - """return the minimum element in the set""" - if self.fastasc is None: - v = min(self) - else: - for v in self.fastasc(): - break - else: - raise ValueError('arg is an empty sequence') - self.min = lambda: v - return v - - def max(self): - """return the maximum element in the set""" - if self.fastdesc is None: - return max(self) - else: - for v in self.fastdesc(): - break - else: - raise ValueError('arg is an empty sequence') - self.max = lambda: v - return v - - def first(self): - """return the first element in the set (user iteration perspective) - - Return None if the set is empty""" - raise NotImplementedError() - - def last(self): - """return the last element in the set (user iteration perspective) - - Return None if the set is empty""" - raise NotImplementedError() - - def __len__(self): - """return the length of the smartsets - - This can be expensive on smartset that could be lazy otherwise.""" - raise NotImplementedError() - - def reverse(self): - """reverse the expected iteration order""" - raise NotImplementedError() - - def sort(self, reverse=True): - """get the set to iterate in an ascending or descending order""" - raise NotImplementedError() - - def __and__(self, other): - """Returns a new object with the intersection of the two collections. - - This is part of the mandatory API for smartset.""" - if isinstance(other, fullreposet): - return self - return self.filter(other.__contains__, condrepr=other, cache=False) - - def __add__(self, other): - """Returns a new object with the union of the two collections. - - This is part of the mandatory API for smartset.""" - return addset(self, other) - - def __sub__(self, other): - """Returns a new object with the substraction of the two collections. - - This is part of the mandatory API for smartset.""" - c = other.__contains__ - return self.filter(lambda r: not c(r), condrepr=('', other), - cache=False) - - def filter(self, condition, condrepr=None, cache=True): - """Returns this smartset filtered by condition as a new smartset. - - `condition` is a callable which takes a revision number and returns a - boolean. Optional `condrepr` provides a printable representation of - the given `condition`. - - This is part of the mandatory API for smartset.""" - # builtin cannot be cached. but do not needs to - if cache and util.safehasattr(condition, 'func_code'): - condition = util.cachefunc(condition) - return filteredset(self, condition, condrepr) - -class baseset(abstractsmartset): - """Basic data structure that represents a revset and contains the basic - operation that it should be able to perform. - - Every method in this class should be implemented by any smartset class. - """ - def __init__(self, data=(), datarepr=None, istopo=False): - """ - datarepr: a tuple of (format, obj, ...), a function or an object that - provides a printable representation of the given data. - """ - self._ascending = None - self._istopo = istopo - if not isinstance(data, list): - if isinstance(data, set): - self._set = data - # set has no order we pick one for stability purpose - self._ascending = True - data = list(data) - self._list = data - self._datarepr = datarepr - - @util.propertycache - def _set(self): - return set(self._list) - - @util.propertycache - def _asclist(self): - asclist = self._list[:] - asclist.sort() - return asclist - - def __iter__(self): - if self._ascending is None: - return iter(self._list) - elif self._ascending: - return iter(self._asclist) - else: - return reversed(self._asclist) - - def fastasc(self): - return iter(self._asclist) - - def fastdesc(self): - return reversed(self._asclist) - - @util.propertycache - def __contains__(self): - return self._set.__contains__ - - def __nonzero__(self): - return bool(self._list) - - def sort(self, reverse=False): - self._ascending = not bool(reverse) - self._istopo = False - - def reverse(self): - if self._ascending is None: - self._list.reverse() - else: - self._ascending = not self._ascending - self._istopo = False - - def __len__(self): - return len(self._list) - - def isascending(self): - """Returns True if the collection is ascending order, False if not. - - This is part of the mandatory API for smartset.""" - if len(self) <= 1: - return True - return self._ascending is not None and self._ascending - - def isdescending(self): - """Returns True if the collection is descending order, False if not. - - This is part of the mandatory API for smartset.""" - if len(self) <= 1: - return True - return self._ascending is not None and not self._ascending - - def istopo(self): - """Is the collection is in topographical order or not. - - This is part of the mandatory API for smartset.""" - if len(self) <= 1: - return True - return self._istopo - - def first(self): - if self: - if self._ascending is None: - return self._list[0] - elif self._ascending: - return self._asclist[0] - else: - return self._asclist[-1] - return None - - def last(self): - if self: - if self._ascending is None: - return self._list[-1] - elif self._ascending: - return self._asclist[-1] - else: - return self._asclist[0] - return None - - def __repr__(self): - d = {None: '', False: '-', True: '+'}[self._ascending] - s = _formatsetrepr(self._datarepr) - if not s: - l = self._list - # if _list has been built from a set, it might have a different - # order from one python implementation to another. - # We fallback to the sorted version for a stable output. - if self._ascending is not None: - l = self._asclist - s = repr(l) - return '<%s%s %s>' % (type(self).__name__, d, s) - -class filteredset(abstractsmartset): - """Duck type for baseset class which iterates lazily over the revisions in - the subset and contains a function which tests for membership in the - revset - """ - def __init__(self, subset, condition=lambda x: True, condrepr=None): - """ - condition: a function that decide whether a revision in the subset - belongs to the revset or not. - condrepr: a tuple of (format, obj, ...), a function or an object that - provides a printable representation of the given condition. - """ - self._subset = subset - self._condition = condition - self._condrepr = condrepr - - def __contains__(self, x): - return x in self._subset and self._condition(x) - - def __iter__(self): - return self._iterfilter(self._subset) - - def _iterfilter(self, it): - cond = self._condition - for x in it: - if cond(x): - yield x - - @property - def fastasc(self): - it = self._subset.fastasc - if it is None: - return None - return lambda: self._iterfilter(it()) - - @property - def fastdesc(self): - it = self._subset.fastdesc - if it is None: - return None - return lambda: self._iterfilter(it()) - - def __nonzero__(self): - fast = None - candidates = [self.fastasc if self.isascending() else None, - self.fastdesc if self.isdescending() else None, - self.fastasc, - self.fastdesc] - for candidate in candidates: - if candidate is not None: - fast = candidate - break - - if fast is not None: - it = fast() - else: - it = self - - for r in it: - return True - return False - - def __len__(self): - # Basic implementation to be changed in future patches. - # until this gets improved, we use generator expression - # here, since list comprehensions are free to call __len__ again - # causing infinite recursion - l = baseset(r for r in self) - return len(l) - - def sort(self, reverse=False): - self._subset.sort(reverse=reverse) - - def reverse(self): - self._subset.reverse() - - def isascending(self): - return self._subset.isascending() - - def isdescending(self): - return self._subset.isdescending() - - def istopo(self): - return self._subset.istopo() - - def first(self): - for x in self: - return x - return None - - def last(self): - it = None - if self.isascending(): - it = self.fastdesc - elif self.isdescending(): - it = self.fastasc - if it is not None: - for x in it(): - return x - return None #empty case - else: - x = None - for x in self: - pass - return x - - def __repr__(self): - xs = [repr(self._subset)] - s = _formatsetrepr(self._condrepr) - if s: - xs.append(s) - return '<%s %s>' % (type(self).__name__, ', '.join(xs)) - -def _iterordered(ascending, iter1, iter2): - """produce an ordered iteration from two iterators with the same order - - The ascending is used to indicated the iteration direction. - """ - choice = max - if ascending: - choice = min - - val1 = None - val2 = None - try: - # Consume both iterators in an ordered way until one is empty - while True: - if val1 is None: - val1 = next(iter1) - if val2 is None: - val2 = next(iter2) - n = choice(val1, val2) - yield n - if val1 == n: - val1 = None - if val2 == n: - val2 = None - except StopIteration: - # Flush any remaining values and consume the other one - it = iter2 - if val1 is not None: - yield val1 - it = iter1 - elif val2 is not None: - # might have been equality and both are empty - yield val2 - for val in it: - yield val - -class addset(abstractsmartset): - """Represent the addition of two sets - - Wrapper structure for lazily adding two structures without losing much - performance on the __contains__ method - - If the ascending attribute is set, that means the two structures are - ordered in either an ascending or descending way. Therefore, we can add - them maintaining the order by iterating over both at the same time - - >>> xs = baseset([0, 3, 2]) - >>> ys = baseset([5, 2, 4]) - - >>> rs = addset(xs, ys) - >>> bool(rs), 0 in rs, 1 in rs, 5 in rs, rs.first(), rs.last() - (True, True, False, True, 0, 4) - >>> rs = addset(xs, baseset([])) - >>> bool(rs), 0 in rs, 1 in rs, rs.first(), rs.last() - (True, True, False, 0, 2) - >>> rs = addset(baseset([]), baseset([])) - >>> bool(rs), 0 in rs, rs.first(), rs.last() - (False, False, None, None) - - iterate unsorted: - >>> rs = addset(xs, ys) - >>> # (use generator because pypy could call len()) - >>> list(x for x in rs) # without _genlist - [0, 3, 2, 5, 4] - >>> assert not rs._genlist - >>> len(rs) - 5 - >>> [x for x in rs] # with _genlist - [0, 3, 2, 5, 4] - >>> assert rs._genlist - - iterate ascending: - >>> rs = addset(xs, ys, ascending=True) - >>> # (use generator because pypy could call len()) - >>> list(x for x in rs), list(x for x in rs.fastasc()) # without _asclist - ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5]) - >>> assert not rs._asclist - >>> len(rs) - 5 - >>> [x for x in rs], [x for x in rs.fastasc()] - ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5]) - >>> assert rs._asclist - - iterate descending: - >>> rs = addset(xs, ys, ascending=False) - >>> # (use generator because pypy could call len()) - >>> list(x for x in rs), list(x for x in rs.fastdesc()) # without _asclist - ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0]) - >>> assert not rs._asclist - >>> len(rs) - 5 - >>> [x for x in rs], [x for x in rs.fastdesc()] - ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0]) - >>> assert rs._asclist - - iterate ascending without fastasc: - >>> rs = addset(xs, generatorset(ys), ascending=True) - >>> assert rs.fastasc is None - >>> [x for x in rs] - [0, 2, 3, 4, 5] - - iterate descending without fastdesc: - >>> rs = addset(generatorset(xs), ys, ascending=False) - >>> assert rs.fastdesc is None - >>> [x for x in rs] - [5, 4, 3, 2, 0] - """ - def __init__(self, revs1, revs2, ascending=None): - self._r1 = revs1 - self._r2 = revs2 - self._iter = None - self._ascending = ascending - self._genlist = None - self._asclist = None - - def __len__(self): - return len(self._list) - - def __nonzero__(self): - return bool(self._r1) or bool(self._r2) - - @util.propertycache - def _list(self): - if not self._genlist: - self._genlist = baseset(iter(self)) - return self._genlist - - def __iter__(self): - """Iterate over both collections without repeating elements - - If the ascending attribute is not set, iterate over the first one and - then over the second one checking for membership on the first one so we - dont yield any duplicates. - - If the ascending attribute is set, iterate over both collections at the - same time, yielding only one value at a time in the given order. - """ - if self._ascending is None: - if self._genlist: - return iter(self._genlist) - def arbitraryordergen(): - for r in self._r1: - yield r - inr1 = self._r1.__contains__ - for r in self._r2: - if not inr1(r): - yield r - return arbitraryordergen() - # try to use our own fast iterator if it exists - self._trysetasclist() - if self._ascending: - attr = 'fastasc' - else: - attr = 'fastdesc' - it = getattr(self, attr) - if it is not None: - return it() - # maybe half of the component supports fast - # get iterator for _r1 - iter1 = getattr(self._r1, attr) - if iter1 is None: - # let's avoid side effect (not sure it matters) - iter1 = iter(sorted(self._r1, reverse=not self._ascending)) - else: - iter1 = iter1() - # get iterator for _r2 - iter2 = getattr(self._r2, attr) - if iter2 is None: - # let's avoid side effect (not sure it matters) - iter2 = iter(sorted(self._r2, reverse=not self._ascending)) - else: - iter2 = iter2() - return _iterordered(self._ascending, iter1, iter2) - - def _trysetasclist(self): - """populate the _asclist attribute if possible and necessary""" - if self._genlist is not None and self._asclist is None: - self._asclist = sorted(self._genlist) - - @property - def fastasc(self): - self._trysetasclist() - if self._asclist is not None: - return self._asclist.__iter__ - iter1 = self._r1.fastasc - iter2 = self._r2.fastasc - if None in (iter1, iter2): - return None - return lambda: _iterordered(True, iter1(), iter2()) - - @property - def fastdesc(self): - self._trysetasclist() - if self._asclist is not None: - return self._asclist.__reversed__ - iter1 = self._r1.fastdesc - iter2 = self._r2.fastdesc - if None in (iter1, iter2): - return None - return lambda: _iterordered(False, iter1(), iter2()) - - def __contains__(self, x): - return x in self._r1 or x in self._r2 - - def sort(self, reverse=False): - """Sort the added set - - For this we use the cached list with all the generated values and if we - know they are ascending or descending we can sort them in a smart way. - """ - self._ascending = not reverse - - def isascending(self): - return self._ascending is not None and self._ascending - - def isdescending(self): - return self._ascending is not None and not self._ascending - - def istopo(self): - # not worth the trouble asserting if the two sets combined are still - # in topographical order. Use the sort() predicate to explicitly sort - # again instead. - return False - - def reverse(self): - if self._ascending is None: - self._list.reverse() - else: - self._ascending = not self._ascending - - def first(self): - for x in self: - return x - return None - - def last(self): - self.reverse() - val = self.first() - self.reverse() - return val - - def __repr__(self): - d = {None: '', False: '-', True: '+'}[self._ascending] - return '<%s%s %r, %r>' % (type(self).__name__, d, self._r1, self._r2) - -class generatorset(abstractsmartset): - """Wrap a generator for lazy iteration - - Wrapper structure for generators that provides lazy membership and can - be iterated more than once. - When asked for membership it generates values until either it finds the - requested one or has gone through all the elements in the generator - """ - def __init__(self, gen, iterasc=None): - """ - gen: a generator producing the values for the generatorset. - """ - self._gen = gen - self._asclist = None - self._cache = {} - self._genlist = [] - self._finished = False - self._ascending = True - if iterasc is not None: - if iterasc: - self.fastasc = self._iterator - self.__contains__ = self._asccontains - else: - self.fastdesc = self._iterator - self.__contains__ = self._desccontains - - def __nonzero__(self): - # Do not use 'for r in self' because it will enforce the iteration - # order (default ascending), possibly unrolling a whole descending - # iterator. - if self._genlist: - return True - for r in self._consumegen(): - return True - return False - - def __contains__(self, x): - if x in self._cache: - return self._cache[x] - - # Use new values only, as existing values would be cached. - for l in self._consumegen(): - if l == x: - return True - - self._cache[x] = False - return False - - def _asccontains(self, x): - """version of contains optimised for ascending generator""" - if x in self._cache: - return self._cache[x] - - # Use new values only, as existing values would be cached. - for l in self._consumegen(): - if l == x: - return True - if l > x: - break - - self._cache[x] = False - return False - - def _desccontains(self, x): - """version of contains optimised for descending generator""" - if x in self._cache: - return self._cache[x] - - # Use new values only, as existing values would be cached. - for l in self._consumegen(): - if l == x: - return True - if l < x: - break - - self._cache[x] = False - return False - - def __iter__(self): - if self._ascending: - it = self.fastasc - else: - it = self.fastdesc - if it is not None: - return it() - # we need to consume the iterator - for x in self._consumegen(): - pass - # recall the same code - return iter(self) - - def _iterator(self): - if self._finished: - return iter(self._genlist) - - # We have to use this complex iteration strategy to allow multiple - # iterations at the same time. We need to be able to catch revision - # removed from _consumegen and added to genlist in another instance. - # - # Getting rid of it would provide an about 15% speed up on this - # iteration. - genlist = self._genlist - nextrev = self._consumegen().next - _len = len # cache global lookup - def gen(): - i = 0 - while True: - if i < _len(genlist): - yield genlist[i] - else: - yield nextrev() - i += 1 - return gen() - - def _consumegen(self): - cache = self._cache - genlist = self._genlist.append - for item in self._gen: - cache[item] = True - genlist(item) - yield item - if not self._finished: - self._finished = True - asc = self._genlist[:] - asc.sort() - self._asclist = asc - self.fastasc = asc.__iter__ - self.fastdesc = asc.__reversed__ - - def __len__(self): - for x in self._consumegen(): - pass - return len(self._genlist) - - def sort(self, reverse=False): - self._ascending = not reverse - - def reverse(self): - self._ascending = not self._ascending - - def isascending(self): - return self._ascending - - def isdescending(self): - return not self._ascending - - def istopo(self): - # not worth the trouble asserting if the two sets combined are still - # in topographical order. Use the sort() predicate to explicitly sort - # again instead. - return False - - def first(self): - if self._ascending: - it = self.fastasc - else: - it = self.fastdesc - if it is None: - # we need to consume all and try again - for x in self._consumegen(): - pass - return self.first() - return next(it(), None) - - def last(self): - if self._ascending: - it = self.fastdesc - else: - it = self.fastasc - if it is None: - # we need to consume all and try again - for x in self._consumegen(): - pass - return self.first() - return next(it(), None) - - def __repr__(self): - d = {False: '-', True: '+'}[self._ascending] - return '<%s%s>' % (type(self).__name__, d) - -class spanset(abstractsmartset): - """Duck type for baseset class which represents a range of revisions and - can work lazily and without having all the range in memory - - Note that spanset(x, y) behave almost like xrange(x, y) except for two - notable points: - - when x < y it will be automatically descending, - - revision filtered with this repoview will be skipped. - - """ - def __init__(self, repo, start=0, end=None): - """ - start: first revision included the set - (default to 0) - end: first revision excluded (last+1) - (default to len(repo) - - Spanset will be descending if `end` < `start`. - """ - if end is None: - end = len(repo) - self._ascending = start <= end - if not self._ascending: - start, end = end + 1, start +1 - self._start = start - self._end = end - self._hiddenrevs = repo.changelog.filteredrevs - - def sort(self, reverse=False): - self._ascending = not reverse - - def reverse(self): - self._ascending = not self._ascending - - def istopo(self): - # not worth the trouble asserting if the two sets combined are still - # in topographical order. Use the sort() predicate to explicitly sort - # again instead. - return False - - def _iterfilter(self, iterrange): - s = self._hiddenrevs - for r in iterrange: - if r not in s: - yield r - - def __iter__(self): - if self._ascending: - return self.fastasc() - else: - return self.fastdesc() - - def fastasc(self): - iterrange = xrange(self._start, self._end) - if self._hiddenrevs: - return self._iterfilter(iterrange) - return iter(iterrange) - - def fastdesc(self): - iterrange = xrange(self._end - 1, self._start - 1, -1) - if self._hiddenrevs: - return self._iterfilter(iterrange) - return iter(iterrange) - - def __contains__(self, rev): - hidden = self._hiddenrevs - return ((self._start <= rev < self._end) - and not (hidden and rev in hidden)) - - def __nonzero__(self): - for r in self: - return True - return False - - def __len__(self): - if not self._hiddenrevs: - return abs(self._end - self._start) - else: - count = 0 - start = self._start - end = self._end - for rev in self._hiddenrevs: - if (end < rev <= start) or (start <= rev < end): - count += 1 - return abs(self._end - self._start) - count - - def isascending(self): - return self._ascending - - def isdescending(self): - return not self._ascending - - def first(self): - if self._ascending: - it = self.fastasc - else: - it = self.fastdesc - for x in it(): - return x - return None - - def last(self): - if self._ascending: - it = self.fastdesc - else: - it = self.fastasc - for x in it(): - return x - return None - - def __repr__(self): - d = {False: '-', True: '+'}[self._ascending] - return '<%s%s %d:%d>' % (type(self).__name__, d, - self._start, self._end - 1) - -class fullreposet(spanset): - """a set containing all revisions in the repo - - This class exists to host special optimization and magic to handle virtual - revisions such as "null". - """ - - def __init__(self, repo): - super(fullreposet, self).__init__(repo) - - def __and__(self, other): - """As self contains the whole repo, all of the other set should also be - in self. Therefore `self & other = other`. - - This boldly assumes the other contains valid revs only. - """ - # other not a smartset, make is so - if not util.safehasattr(other, 'isascending'): - # filter out hidden revision - # (this boldly assumes all smartset are pure) - # - # `other` was used with "&", let's assume this is a set like - # object. - other = baseset(other - self._hiddenrevs) - - other.sort(reverse=self.isdescending()) - return other - -def prettyformatset(revs): - lines = [] - rs = repr(revs) - p = 0 - while p < len(rs): - q = rs.find('<', p + 1) - if q < 0: - q = len(rs) - l = rs.count('<', 0, p) - rs.count('>', 0, p) - assert l >= 0 - lines.append((l, rs[p:q].rstrip())) - p = q - return '\n'.join(' ' * l + s for l, s in lines) - def loadpredicate(ui, extname, registrarobj): """Load revset predicates from specified registrarobj """ diff -r b6c051cd1231 -r 1be65deb3d54 mercurial/smartset.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/smartset.py Sun Oct 16 17:28:51 2016 +0900 @@ -0,0 +1,973 @@ +# smartset.py - data structure for revision set +# +# Copyright 2010 Matt Mackall +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +from . import ( + util, +) + +def _formatsetrepr(r): + """Format an optional printable representation of a set + + ======== ================================= + type(r) example + ======== ================================= + tuple ('', other) + str '' + callable lambda: '' % sorted(b) + object other + ======== ================================= + """ + if r is None: + return '' + elif isinstance(r, tuple): + return r[0] % r[1:] + elif isinstance(r, str): + return r + elif callable(r): + return r() + else: + return repr(r) + +class abstractsmartset(object): + + def __nonzero__(self): + """True if the smartset is not empty""" + raise NotImplementedError() + + def __contains__(self, rev): + """provide fast membership testing""" + raise NotImplementedError() + + def __iter__(self): + """iterate the set in the order it is supposed to be iterated""" + raise NotImplementedError() + + # Attributes containing a function to perform a fast iteration in a given + # direction. A smartset can have none, one, or both defined. + # + # Default value is None instead of a function returning None to avoid + # initializing an iterator just for testing if a fast method exists. + fastasc = None + fastdesc = None + + def isascending(self): + """True if the set will iterate in ascending order""" + raise NotImplementedError() + + def isdescending(self): + """True if the set will iterate in descending order""" + raise NotImplementedError() + + def istopo(self): + """True if the set will iterate in topographical order""" + raise NotImplementedError() + + def min(self): + """return the minimum element in the set""" + if self.fastasc is None: + v = min(self) + else: + for v in self.fastasc(): + break + else: + raise ValueError('arg is an empty sequence') + self.min = lambda: v + return v + + def max(self): + """return the maximum element in the set""" + if self.fastdesc is None: + return max(self) + else: + for v in self.fastdesc(): + break + else: + raise ValueError('arg is an empty sequence') + self.max = lambda: v + return v + + def first(self): + """return the first element in the set (user iteration perspective) + + Return None if the set is empty""" + raise NotImplementedError() + + def last(self): + """return the last element in the set (user iteration perspective) + + Return None if the set is empty""" + raise NotImplementedError() + + def __len__(self): + """return the length of the smartsets + + This can be expensive on smartset that could be lazy otherwise.""" + raise NotImplementedError() + + def reverse(self): + """reverse the expected iteration order""" + raise NotImplementedError() + + def sort(self, reverse=True): + """get the set to iterate in an ascending or descending order""" + raise NotImplementedError() + + def __and__(self, other): + """Returns a new object with the intersection of the two collections. + + This is part of the mandatory API for smartset.""" + if isinstance(other, fullreposet): + return self + return self.filter(other.__contains__, condrepr=other, cache=False) + + def __add__(self, other): + """Returns a new object with the union of the two collections. + + This is part of the mandatory API for smartset.""" + return addset(self, other) + + def __sub__(self, other): + """Returns a new object with the substraction of the two collections. + + This is part of the mandatory API for smartset.""" + c = other.__contains__ + return self.filter(lambda r: not c(r), condrepr=('', other), + cache=False) + + def filter(self, condition, condrepr=None, cache=True): + """Returns this smartset filtered by condition as a new smartset. + + `condition` is a callable which takes a revision number and returns a + boolean. Optional `condrepr` provides a printable representation of + the given `condition`. + + This is part of the mandatory API for smartset.""" + # builtin cannot be cached. but do not needs to + if cache and util.safehasattr(condition, 'func_code'): + condition = util.cachefunc(condition) + return filteredset(self, condition, condrepr) + +class baseset(abstractsmartset): + """Basic data structure that represents a revset and contains the basic + operation that it should be able to perform. + + Every method in this class should be implemented by any smartset class. + """ + def __init__(self, data=(), datarepr=None, istopo=False): + """ + datarepr: a tuple of (format, obj, ...), a function or an object that + provides a printable representation of the given data. + """ + self._ascending = None + self._istopo = istopo + if not isinstance(data, list): + if isinstance(data, set): + self._set = data + # set has no order we pick one for stability purpose + self._ascending = True + data = list(data) + self._list = data + self._datarepr = datarepr + + @util.propertycache + def _set(self): + return set(self._list) + + @util.propertycache + def _asclist(self): + asclist = self._list[:] + asclist.sort() + return asclist + + def __iter__(self): + if self._ascending is None: + return iter(self._list) + elif self._ascending: + return iter(self._asclist) + else: + return reversed(self._asclist) + + def fastasc(self): + return iter(self._asclist) + + def fastdesc(self): + return reversed(self._asclist) + + @util.propertycache + def __contains__(self): + return self._set.__contains__ + + def __nonzero__(self): + return bool(self._list) + + def sort(self, reverse=False): + self._ascending = not bool(reverse) + self._istopo = False + + def reverse(self): + if self._ascending is None: + self._list.reverse() + else: + self._ascending = not self._ascending + self._istopo = False + + def __len__(self): + return len(self._list) + + def isascending(self): + """Returns True if the collection is ascending order, False if not. + + This is part of the mandatory API for smartset.""" + if len(self) <= 1: + return True + return self._ascending is not None and self._ascending + + def isdescending(self): + """Returns True if the collection is descending order, False if not. + + This is part of the mandatory API for smartset.""" + if len(self) <= 1: + return True + return self._ascending is not None and not self._ascending + + def istopo(self): + """Is the collection is in topographical order or not. + + This is part of the mandatory API for smartset.""" + if len(self) <= 1: + return True + return self._istopo + + def first(self): + if self: + if self._ascending is None: + return self._list[0] + elif self._ascending: + return self._asclist[0] + else: + return self._asclist[-1] + return None + + def last(self): + if self: + if self._ascending is None: + return self._list[-1] + elif self._ascending: + return self._asclist[-1] + else: + return self._asclist[0] + return None + + def __repr__(self): + d = {None: '', False: '-', True: '+'}[self._ascending] + s = _formatsetrepr(self._datarepr) + if not s: + l = self._list + # if _list has been built from a set, it might have a different + # order from one python implementation to another. + # We fallback to the sorted version for a stable output. + if self._ascending is not None: + l = self._asclist + s = repr(l) + return '<%s%s %s>' % (type(self).__name__, d, s) + +class filteredset(abstractsmartset): + """Duck type for baseset class which iterates lazily over the revisions in + the subset and contains a function which tests for membership in the + revset + """ + def __init__(self, subset, condition=lambda x: True, condrepr=None): + """ + condition: a function that decide whether a revision in the subset + belongs to the revset or not. + condrepr: a tuple of (format, obj, ...), a function or an object that + provides a printable representation of the given condition. + """ + self._subset = subset + self._condition = condition + self._condrepr = condrepr + + def __contains__(self, x): + return x in self._subset and self._condition(x) + + def __iter__(self): + return self._iterfilter(self._subset) + + def _iterfilter(self, it): + cond = self._condition + for x in it: + if cond(x): + yield x + + @property + def fastasc(self): + it = self._subset.fastasc + if it is None: + return None + return lambda: self._iterfilter(it()) + + @property + def fastdesc(self): + it = self._subset.fastdesc + if it is None: + return None + return lambda: self._iterfilter(it()) + + def __nonzero__(self): + fast = None + candidates = [self.fastasc if self.isascending() else None, + self.fastdesc if self.isdescending() else None, + self.fastasc, + self.fastdesc] + for candidate in candidates: + if candidate is not None: + fast = candidate + break + + if fast is not None: + it = fast() + else: + it = self + + for r in it: + return True + return False + + def __len__(self): + # Basic implementation to be changed in future patches. + # until this gets improved, we use generator expression + # here, since list comprehensions are free to call __len__ again + # causing infinite recursion + l = baseset(r for r in self) + return len(l) + + def sort(self, reverse=False): + self._subset.sort(reverse=reverse) + + def reverse(self): + self._subset.reverse() + + def isascending(self): + return self._subset.isascending() + + def isdescending(self): + return self._subset.isdescending() + + def istopo(self): + return self._subset.istopo() + + def first(self): + for x in self: + return x + return None + + def last(self): + it = None + if self.isascending(): + it = self.fastdesc + elif self.isdescending(): + it = self.fastasc + if it is not None: + for x in it(): + return x + return None #empty case + else: + x = None + for x in self: + pass + return x + + def __repr__(self): + xs = [repr(self._subset)] + s = _formatsetrepr(self._condrepr) + if s: + xs.append(s) + return '<%s %s>' % (type(self).__name__, ', '.join(xs)) + +def _iterordered(ascending, iter1, iter2): + """produce an ordered iteration from two iterators with the same order + + The ascending is used to indicated the iteration direction. + """ + choice = max + if ascending: + choice = min + + val1 = None + val2 = None + try: + # Consume both iterators in an ordered way until one is empty + while True: + if val1 is None: + val1 = next(iter1) + if val2 is None: + val2 = next(iter2) + n = choice(val1, val2) + yield n + if val1 == n: + val1 = None + if val2 == n: + val2 = None + except StopIteration: + # Flush any remaining values and consume the other one + it = iter2 + if val1 is not None: + yield val1 + it = iter1 + elif val2 is not None: + # might have been equality and both are empty + yield val2 + for val in it: + yield val + +class addset(abstractsmartset): + """Represent the addition of two sets + + Wrapper structure for lazily adding two structures without losing much + performance on the __contains__ method + + If the ascending attribute is set, that means the two structures are + ordered in either an ascending or descending way. Therefore, we can add + them maintaining the order by iterating over both at the same time + + >>> xs = baseset([0, 3, 2]) + >>> ys = baseset([5, 2, 4]) + + >>> rs = addset(xs, ys) + >>> bool(rs), 0 in rs, 1 in rs, 5 in rs, rs.first(), rs.last() + (True, True, False, True, 0, 4) + >>> rs = addset(xs, baseset([])) + >>> bool(rs), 0 in rs, 1 in rs, rs.first(), rs.last() + (True, True, False, 0, 2) + >>> rs = addset(baseset([]), baseset([])) + >>> bool(rs), 0 in rs, rs.first(), rs.last() + (False, False, None, None) + + iterate unsorted: + >>> rs = addset(xs, ys) + >>> # (use generator because pypy could call len()) + >>> list(x for x in rs) # without _genlist + [0, 3, 2, 5, 4] + >>> assert not rs._genlist + >>> len(rs) + 5 + >>> [x for x in rs] # with _genlist + [0, 3, 2, 5, 4] + >>> assert rs._genlist + + iterate ascending: + >>> rs = addset(xs, ys, ascending=True) + >>> # (use generator because pypy could call len()) + >>> list(x for x in rs), list(x for x in rs.fastasc()) # without _asclist + ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5]) + >>> assert not rs._asclist + >>> len(rs) + 5 + >>> [x for x in rs], [x for x in rs.fastasc()] + ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5]) + >>> assert rs._asclist + + iterate descending: + >>> rs = addset(xs, ys, ascending=False) + >>> # (use generator because pypy could call len()) + >>> list(x for x in rs), list(x for x in rs.fastdesc()) # without _asclist + ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0]) + >>> assert not rs._asclist + >>> len(rs) + 5 + >>> [x for x in rs], [x for x in rs.fastdesc()] + ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0]) + >>> assert rs._asclist + + iterate ascending without fastasc: + >>> rs = addset(xs, generatorset(ys), ascending=True) + >>> assert rs.fastasc is None + >>> [x for x in rs] + [0, 2, 3, 4, 5] + + iterate descending without fastdesc: + >>> rs = addset(generatorset(xs), ys, ascending=False) + >>> assert rs.fastdesc is None + >>> [x for x in rs] + [5, 4, 3, 2, 0] + """ + def __init__(self, revs1, revs2, ascending=None): + self._r1 = revs1 + self._r2 = revs2 + self._iter = None + self._ascending = ascending + self._genlist = None + self._asclist = None + + def __len__(self): + return len(self._list) + + def __nonzero__(self): + return bool(self._r1) or bool(self._r2) + + @util.propertycache + def _list(self): + if not self._genlist: + self._genlist = baseset(iter(self)) + return self._genlist + + def __iter__(self): + """Iterate over both collections without repeating elements + + If the ascending attribute is not set, iterate over the first one and + then over the second one checking for membership on the first one so we + dont yield any duplicates. + + If the ascending attribute is set, iterate over both collections at the + same time, yielding only one value at a time in the given order. + """ + if self._ascending is None: + if self._genlist: + return iter(self._genlist) + def arbitraryordergen(): + for r in self._r1: + yield r + inr1 = self._r1.__contains__ + for r in self._r2: + if not inr1(r): + yield r + return arbitraryordergen() + # try to use our own fast iterator if it exists + self._trysetasclist() + if self._ascending: + attr = 'fastasc' + else: + attr = 'fastdesc' + it = getattr(self, attr) + if it is not None: + return it() + # maybe half of the component supports fast + # get iterator for _r1 + iter1 = getattr(self._r1, attr) + if iter1 is None: + # let's avoid side effect (not sure it matters) + iter1 = iter(sorted(self._r1, reverse=not self._ascending)) + else: + iter1 = iter1() + # get iterator for _r2 + iter2 = getattr(self._r2, attr) + if iter2 is None: + # let's avoid side effect (not sure it matters) + iter2 = iter(sorted(self._r2, reverse=not self._ascending)) + else: + iter2 = iter2() + return _iterordered(self._ascending, iter1, iter2) + + def _trysetasclist(self): + """populate the _asclist attribute if possible and necessary""" + if self._genlist is not None and self._asclist is None: + self._asclist = sorted(self._genlist) + + @property + def fastasc(self): + self._trysetasclist() + if self._asclist is not None: + return self._asclist.__iter__ + iter1 = self._r1.fastasc + iter2 = self._r2.fastasc + if None in (iter1, iter2): + return None + return lambda: _iterordered(True, iter1(), iter2()) + + @property + def fastdesc(self): + self._trysetasclist() + if self._asclist is not None: + return self._asclist.__reversed__ + iter1 = self._r1.fastdesc + iter2 = self._r2.fastdesc + if None in (iter1, iter2): + return None + return lambda: _iterordered(False, iter1(), iter2()) + + def __contains__(self, x): + return x in self._r1 or x in self._r2 + + def sort(self, reverse=False): + """Sort the added set + + For this we use the cached list with all the generated values and if we + know they are ascending or descending we can sort them in a smart way. + """ + self._ascending = not reverse + + def isascending(self): + return self._ascending is not None and self._ascending + + def isdescending(self): + return self._ascending is not None and not self._ascending + + def istopo(self): + # not worth the trouble asserting if the two sets combined are still + # in topographical order. Use the sort() predicate to explicitly sort + # again instead. + return False + + def reverse(self): + if self._ascending is None: + self._list.reverse() + else: + self._ascending = not self._ascending + + def first(self): + for x in self: + return x + return None + + def last(self): + self.reverse() + val = self.first() + self.reverse() + return val + + def __repr__(self): + d = {None: '', False: '-', True: '+'}[self._ascending] + return '<%s%s %r, %r>' % (type(self).__name__, d, self._r1, self._r2) + +class generatorset(abstractsmartset): + """Wrap a generator for lazy iteration + + Wrapper structure for generators that provides lazy membership and can + be iterated more than once. + When asked for membership it generates values until either it finds the + requested one or has gone through all the elements in the generator + """ + def __init__(self, gen, iterasc=None): + """ + gen: a generator producing the values for the generatorset. + """ + self._gen = gen + self._asclist = None + self._cache = {} + self._genlist = [] + self._finished = False + self._ascending = True + if iterasc is not None: + if iterasc: + self.fastasc = self._iterator + self.__contains__ = self._asccontains + else: + self.fastdesc = self._iterator + self.__contains__ = self._desccontains + + def __nonzero__(self): + # Do not use 'for r in self' because it will enforce the iteration + # order (default ascending), possibly unrolling a whole descending + # iterator. + if self._genlist: + return True + for r in self._consumegen(): + return True + return False + + def __contains__(self, x): + if x in self._cache: + return self._cache[x] + + # Use new values only, as existing values would be cached. + for l in self._consumegen(): + if l == x: + return True + + self._cache[x] = False + return False + + def _asccontains(self, x): + """version of contains optimised for ascending generator""" + if x in self._cache: + return self._cache[x] + + # Use new values only, as existing values would be cached. + for l in self._consumegen(): + if l == x: + return True + if l > x: + break + + self._cache[x] = False + return False + + def _desccontains(self, x): + """version of contains optimised for descending generator""" + if x in self._cache: + return self._cache[x] + + # Use new values only, as existing values would be cached. + for l in self._consumegen(): + if l == x: + return True + if l < x: + break + + self._cache[x] = False + return False + + def __iter__(self): + if self._ascending: + it = self.fastasc + else: + it = self.fastdesc + if it is not None: + return it() + # we need to consume the iterator + for x in self._consumegen(): + pass + # recall the same code + return iter(self) + + def _iterator(self): + if self._finished: + return iter(self._genlist) + + # We have to use this complex iteration strategy to allow multiple + # iterations at the same time. We need to be able to catch revision + # removed from _consumegen and added to genlist in another instance. + # + # Getting rid of it would provide an about 15% speed up on this + # iteration. + genlist = self._genlist + nextrev = self._consumegen().next + _len = len # cache global lookup + def gen(): + i = 0 + while True: + if i < _len(genlist): + yield genlist[i] + else: + yield nextrev() + i += 1 + return gen() + + def _consumegen(self): + cache = self._cache + genlist = self._genlist.append + for item in self._gen: + cache[item] = True + genlist(item) + yield item + if not self._finished: + self._finished = True + asc = self._genlist[:] + asc.sort() + self._asclist = asc + self.fastasc = asc.__iter__ + self.fastdesc = asc.__reversed__ + + def __len__(self): + for x in self._consumegen(): + pass + return len(self._genlist) + + def sort(self, reverse=False): + self._ascending = not reverse + + def reverse(self): + self._ascending = not self._ascending + + def isascending(self): + return self._ascending + + def isdescending(self): + return not self._ascending + + def istopo(self): + # not worth the trouble asserting if the two sets combined are still + # in topographical order. Use the sort() predicate to explicitly sort + # again instead. + return False + + def first(self): + if self._ascending: + it = self.fastasc + else: + it = self.fastdesc + if it is None: + # we need to consume all and try again + for x in self._consumegen(): + pass + return self.first() + return next(it(), None) + + def last(self): + if self._ascending: + it = self.fastdesc + else: + it = self.fastasc + if it is None: + # we need to consume all and try again + for x in self._consumegen(): + pass + return self.first() + return next(it(), None) + + def __repr__(self): + d = {False: '-', True: '+'}[self._ascending] + return '<%s%s>' % (type(self).__name__, d) + +class spanset(abstractsmartset): + """Duck type for baseset class which represents a range of revisions and + can work lazily and without having all the range in memory + + Note that spanset(x, y) behave almost like xrange(x, y) except for two + notable points: + - when x < y it will be automatically descending, + - revision filtered with this repoview will be skipped. + + """ + def __init__(self, repo, start=0, end=None): + """ + start: first revision included the set + (default to 0) + end: first revision excluded (last+1) + (default to len(repo) + + Spanset will be descending if `end` < `start`. + """ + if end is None: + end = len(repo) + self._ascending = start <= end + if not self._ascending: + start, end = end + 1, start +1 + self._start = start + self._end = end + self._hiddenrevs = repo.changelog.filteredrevs + + def sort(self, reverse=False): + self._ascending = not reverse + + def reverse(self): + self._ascending = not self._ascending + + def istopo(self): + # not worth the trouble asserting if the two sets combined are still + # in topographical order. Use the sort() predicate to explicitly sort + # again instead. + return False + + def _iterfilter(self, iterrange): + s = self._hiddenrevs + for r in iterrange: + if r not in s: + yield r + + def __iter__(self): + if self._ascending: + return self.fastasc() + else: + return self.fastdesc() + + def fastasc(self): + iterrange = xrange(self._start, self._end) + if self._hiddenrevs: + return self._iterfilter(iterrange) + return iter(iterrange) + + def fastdesc(self): + iterrange = xrange(self._end - 1, self._start - 1, -1) + if self._hiddenrevs: + return self._iterfilter(iterrange) + return iter(iterrange) + + def __contains__(self, rev): + hidden = self._hiddenrevs + return ((self._start <= rev < self._end) + and not (hidden and rev in hidden)) + + def __nonzero__(self): + for r in self: + return True + return False + + def __len__(self): + if not self._hiddenrevs: + return abs(self._end - self._start) + else: + count = 0 + start = self._start + end = self._end + for rev in self._hiddenrevs: + if (end < rev <= start) or (start <= rev < end): + count += 1 + return abs(self._end - self._start) - count + + def isascending(self): + return self._ascending + + def isdescending(self): + return not self._ascending + + def first(self): + if self._ascending: + it = self.fastasc + else: + it = self.fastdesc + for x in it(): + return x + return None + + def last(self): + if self._ascending: + it = self.fastdesc + else: + it = self.fastasc + for x in it(): + return x + return None + + def __repr__(self): + d = {False: '-', True: '+'}[self._ascending] + return '<%s%s %d:%d>' % (type(self).__name__, d, + self._start, self._end - 1) + +class fullreposet(spanset): + """a set containing all revisions in the repo + + This class exists to host special optimization and magic to handle virtual + revisions such as "null". + """ + + def __init__(self, repo): + super(fullreposet, self).__init__(repo) + + def __and__(self, other): + """As self contains the whole repo, all of the other set should also be + in self. Therefore `self & other = other`. + + This boldly assumes the other contains valid revs only. + """ + # other not a smartset, make is so + if not util.safehasattr(other, 'isascending'): + # filter out hidden revision + # (this boldly assumes all smartset are pure) + # + # `other` was used with "&", let's assume this is a set like + # object. + other = baseset(other - self._hiddenrevs) + + other.sort(reverse=self.isdescending()) + return other + +def prettyformat(revs): + lines = [] + rs = repr(revs) + p = 0 + while p < len(rs): + q = rs.find('<', p + 1) + if q < 0: + q = len(rs) + l = rs.count('<', 0, p) - rs.count('>', 0, p) + assert l >= 0 + lines.append((l, rs[p:q].rstrip())) + p = q + return '\n'.join(' ' * l + s for l, s in lines) diff -r b6c051cd1231 -r 1be65deb3d54 tests/test-doctest.py --- a/tests/test-doctest.py Wed Jan 25 22:39:17 2017 +0900 +++ b/tests/test-doctest.py Sun Oct 16 17:28:51 2016 +0900 @@ -29,6 +29,7 @@ testmod('mercurial.pathutil') testmod('mercurial.parser') testmod('mercurial.revset') +testmod('mercurial.smartset') testmod('mercurial.store') testmod('mercurial.subrepo') testmod('mercurial.templatefilters') diff -r b6c051cd1231 -r 1be65deb3d54 tests/test-revset.t --- a/tests/test-revset.t Wed Jan 25 22:39:17 2017 +0900 +++ b/tests/test-revset.t Sun Oct 16 17:28:51 2016 +0900 @@ -40,6 +40,7 @@ > cmdutil, > node as nodemod, > revset, + > smartset, > ) > cmdtable = {} > command = cmdutil.command(cmdtable) @@ -59,7 +60,7 @@ > func = revset.match(ui, expr, repo) > revs = func(repo) > if ui.verbose: - > ui.note("* set:\n", revset.prettyformatset(revs), "\n") + > ui.note("* set:\n", smartset.prettyformat(revs), "\n") > for c in revs: > ui.write("%s\n" % c) > EOF