mercurial/dagop.py
author Pierre-Yves David <pierre-yves.david@octobus.net>
Wed, 27 Mar 2019 18:35:59 +0100
changeset 42044 bb271ec2fbfb
parent 41533 0f64091cc851
child 42057 566daffc607d
permissions -rw-r--r--
compression: introduce a `storage.revlog.zstd.level` configuration This option control the zstd compression level used when compressing revlog chunk. The usage of zstd for revlog compression has not graduated from experimental yet, but we intend to fix that soon. The option name for the compression level is more straight forward to pick, so this changesets comes first. Having a dedicated option for each compression engine is useful because they don't support the same range of values. I ran the same measurement as for the zlib compression level (in the parent changesets). The variation in repository size is stay mostly in the same (small) range. The "read/write" performance see smallish variation, but are overall much better than zlib. Write performance show the same tend of having better write performance for when reaching high-end compression. Again, we don't intend to change the default zstd compression level (currently: 3) in this series. However this is worth investigating in the future. The Performance comparison of zlib vs zstd is quite impressive. The repository size stay in the same range, but the performance are much better in all situations. Comparison summary ================== We are looking at: - performance range for zlib - performance range for zstd - comparison of default zstd (level-3) to default zlib (level 6) - comparison of the slowest zstd time to the fastest zlib time Read performance: ----------------- | zlib | zstd | cmp | f2s mercurial | 0.170159 - 0.189219 | 0.144127 - 0.149624 | 80% | 88% pypy | 2.679217 - 2.768691 | 1.532317 - 1.705044 | 60% | 63% netbeans | 122.477027 - 141.620281 | 72.996346 - 89.731560 | 58% | 73% mozilla | 147.867662 - 170.572118 | 91.700995 - 105.853099 | 56% | 71% Write performance: ------------------ | zlib | zstd | cmp | f2s mercurial | 53.250304 - 56.2936129 | 40.877025 - 45.677286 | 75% | 86% pypy | 460.721984 - 476.589918 | 270.545409 - 301.002219 | 63% | 65% netbeans | 520.560316 - 715.930400 | 370.356311 - 428.329652 | 55% | 82% mozilla | 739.803002 - 987.056093 | 505.152906 - 591.930683 | 57% | 80% Raw data -------- repo alg lvl .hg/store size 00manifest.d read write mercurial zlib 1 49,402,813 5,963,475 0.170159 53.250304 mercurial zlib 6 47,197,397 5,875,730 0.182820 56.264320 mercurial zlib 9 47,121,596 5,849,781 0.189219 56.293612 mercurial zstd 1 49,737,084 5,966,355 0.144127 40.877025 mercurial zstd 3 48,961,867 5,895,208 0.146376 42.268142 mercurial zstd 5 48,200,592 5,938,676 0.149624 43.162875 mercurial zstd 10 47,833,520 5,913,353 0.145185 44.012489 mercurial zstd 15 47,314,604 5,728,679 0.147686 45.677286 mercurial zstd 20 47,330,502 5,830,539 0.145789 45.025407 mercurial zstd 22 47,330,076 5,830,539 0.143996 44.690460 pypy zlib 1 370,830,572 28,462,425 2.679217 460.721984 pypy zlib 6 340,112,317 27,648,747 2.768691 467.537158 pypy zlib 9 338,360,736 27,639,003 2.763495 476.589918 pypy zstd 1 362,377,479 27,916,214 1.532317 270.545409 pypy zstd 3 354,137,693 27,905,988 1.686718 294.951509 pypy zstd 5 342,640,043 27,655,774 1.705044 301.002219 pypy zstd 10 334,224,327 27,164,493 1.567287 285.186239 pypy zstd 15 329,000,363 26,645,965 1.637729 299.561332 pypy zstd 20 324,534,039 26,199,547 1.526813 302.149827 pypy zstd 22 324,530,595 26,198,932 1.525718 307.821218 netbeans zlib 1 1,281,847,810 165,495,457 122.477027 520.560316 netbeans zlib 6 1,205,284,353 159,161,207 139.876147 715.930400 netbeans zlib 9 1,197,135,671 155,034,586 141.620281 678.297064 netbeans zstd 1 1,259,581,737 160,840,613 72.996346 370.356311 netbeans zstd 3 1,232,978,122 157,691,551 81.622317 396.733087 netbeans zstd 5 1,208,034,075 160,246,880 83.080549 364.342626 netbeans zstd 10 1,188,624,176 156,083,417 79.323935 403.594602 netbeans zstd 15 1,176,973,589 153,859,477 89.731560 428.329652 netbeans zstd 20 1,162,958,258 151,147,535 82.842667 392.335349 netbeans zstd 22 1,162,707,029 151,150,220 82.565695 402.840655 mozilla zlib 1 2,775,497,186 298,527,987 147.867662 751.263721 mozilla zlib 6 2,596,856,420 286,597,671 170.572118 987.056093 mozilla zlib 9 2,587,542,494 287,018,264 163.622338 739.803002 mozilla zstd 1 2,723,159,348 286,617,532 91.700995 570.042751 mozilla zstd 3 2,665,055,001 286,152,013 95.240155 561.412805 mozilla zstd 5 2,607,819,817 288,060,030 101.978048 505.152906 mozilla zstd 10 2,558,761,085 283,967,648 104.113481 497.771202 mozilla zstd 15 2,526,216,060 275,581,300 105.853099 591.930683 mozilla zstd 20 2,485,114,806 266,478,859 95.268795 576.515389 mozilla zstd 22 2,484,869,080 266,456,505 94.429282 572.785537

# dagop.py - graph ancestry and topology algorithm for revset
#
# Copyright 2010 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import heapq

from .node import (
    nullrev,
)
from .thirdparty import (
    attr,
)
from . import (
    error,
    mdiff,
    node,
    patch,
    pycompat,
    smartset,
)

baseset = smartset.baseset
generatorset = smartset.generatorset

# possible maximum depth between null and wdir()
maxlogdepth = 0x80000000

def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse):
    """Walk DAG using 'pfunc' from the given 'revs' nodes

    'pfunc(rev)' should return the parent/child revisions of the given 'rev'
    if 'reverse' is True/False respectively.

    Scan ends at the stopdepth (exlusive) if specified. Revisions found
    earlier than the startdepth are omitted.
    """
    if startdepth is None:
        startdepth = 0
    if stopdepth is None:
        stopdepth = maxlogdepth
    if stopdepth == 0:
        return
    if stopdepth < 0:
        raise error.ProgrammingError('negative stopdepth')
    if reverse:
        heapsign = -1  # max heap
    else:
        heapsign = +1  # min heap

    # load input revs lazily to heap so earlier revisions can be yielded
    # without fully computing the input revs
    revs.sort(reverse)
    irevs = iter(revs)
    pendingheap = []  # [(heapsign * rev, depth), ...] (i.e. lower depth first)

    inputrev = next(irevs, None)
    if inputrev is not None:
        heapq.heappush(pendingheap, (heapsign * inputrev, 0))

    lastrev = None
    while pendingheap:
        currev, curdepth = heapq.heappop(pendingheap)
        currev = heapsign * currev
        if currev == inputrev:
            inputrev = next(irevs, None)
            if inputrev is not None:
                heapq.heappush(pendingheap, (heapsign * inputrev, 0))
        # rescan parents until curdepth >= startdepth because queued entries
        # of the same revision are iterated from the lowest depth
        foundnew = (currev != lastrev)
        if foundnew and curdepth >= startdepth:
            lastrev = currev
            yield currev
        pdepth = curdepth + 1
        if foundnew and pdepth < stopdepth:
            for prev in pfunc(currev):
                if prev != node.nullrev:
                    heapq.heappush(pendingheap, (heapsign * prev, pdepth))

def filectxancestors(fctxs, followfirst=False):
    """Like filectx.ancestors(), but can walk from multiple files/revisions,
    and includes the given fctxs themselves

    Yields (rev, {fctx, ...}) pairs in descending order.
    """
    visit = {}
    visitheap = []
    def addvisit(fctx):
        rev = fctx.rev()
        if rev not in visit:
            visit[rev] = set()
            heapq.heappush(visitheap, -rev)  # max heap
        visit[rev].add(fctx)

    if followfirst:
        cut = 1
    else:
        cut = None

    for c in fctxs:
        addvisit(c)
    while visit:
        currev = -heapq.heappop(visitheap)
        curfctxs = visit.pop(currev)
        yield currev, curfctxs
        for c in curfctxs:
            for parent in c.parents()[:cut]:
                addvisit(parent)
    assert not visitheap

def filerevancestors(fctxs, followfirst=False):
    """Like filectx.ancestors(), but can walk from multiple files/revisions,
    and includes the given fctxs themselves

    Returns a smartset.
    """
    gen = (rev for rev, _cs in filectxancestors(fctxs, followfirst))
    return generatorset(gen, iterasc=False)

def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc):
    if followfirst:
        cut = 1
    else:
        cut = None
    cl = repo.changelog
    def plainpfunc(rev):
        try:
            return cl.parentrevs(rev)[:cut]
        except error.WdirUnsupported:
            return (pctx.rev() for pctx in repo[rev].parents()[:cut])
    if cutfunc is None:
        pfunc = plainpfunc
    else:
        pfunc = lambda rev: [r for r in plainpfunc(rev) if not cutfunc(r)]
        revs = revs.filter(lambda rev: not cutfunc(rev))
    return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True)

def revancestors(repo, revs, followfirst=False, startdepth=None,
                 stopdepth=None, cutfunc=None):
    r"""Like revlog.ancestors(), but supports additional options, includes
    the given revs themselves, and returns a smartset

    Scan ends at the stopdepth (exlusive) if specified. Revisions found
    earlier than the startdepth are omitted.

    If cutfunc is provided, it will be used to cut the traversal of the DAG.
    When cutfunc(X) returns True, the DAG traversal stops - revision X and
    X's ancestors in the traversal path will be skipped. This could be an
    optimization sometimes.

    Note: if Y is an ancestor of X, cutfunc(X) returning True does not
    necessarily mean Y will also be cut. Usually cutfunc(Y) also wants to
    return True in this case. For example,

        D     # revancestors(repo, D, cutfunc=lambda rev: rev == B)
        |\    # will include "A", because the path D -> C -> A was not cut.
        B C   # If "B" gets cut, "A" might want to be cut too.
        |/
        A
    """
    gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth,
                           cutfunc)
    return generatorset(gen, iterasc=False)

def _genrevdescendants(repo, revs, followfirst):
    if followfirst:
        cut = 1
    else:
        cut = None

    cl = repo.changelog
    first = revs.min()
    nullrev = node.nullrev
    if first == nullrev:
        # Are there nodes with a null first parent and a non-null
        # second one? Maybe. Do we care? Probably not.
        yield first
        for i in cl:
            yield i
    else:
        seen = set(revs)
        for i in cl.revs(first):
            if i in seen:
                yield i
                continue
            for x in cl.parentrevs(i)[:cut]:
                if x != nullrev and x in seen:
                    seen.add(i)
                    yield i
                    break

def _builddescendantsmap(repo, startrev, followfirst):
    """Build map of 'rev -> child revs', offset from startrev"""
    cl = repo.changelog
    nullrev = node.nullrev
    descmap = [[] for _rev in pycompat.xrange(startrev, len(cl))]
    for currev in cl.revs(startrev + 1):
        p1rev, p2rev = cl.parentrevs(currev)
        if p1rev >= startrev:
            descmap[p1rev - startrev].append(currev)
        if not followfirst and p2rev != nullrev and p2rev >= startrev:
            descmap[p2rev - startrev].append(currev)
    return descmap

def _genrevdescendantsofdepth(repo, revs, followfirst, startdepth, stopdepth):
    startrev = revs.min()
    descmap = _builddescendantsmap(repo, startrev, followfirst)
    def pfunc(rev):
        return descmap[rev - startrev]
    return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=False)

def revdescendants(repo, revs, followfirst, startdepth=None, stopdepth=None):
    """Like revlog.descendants() but supports additional options, includes
    the given revs themselves, and returns a smartset

    Scan ends at the stopdepth (exlusive) if specified. Revisions found
    earlier than the startdepth are omitted.
    """
    if startdepth is None and (stopdepth is None or stopdepth >= maxlogdepth):
        gen = _genrevdescendants(repo, revs, followfirst)
    else:
        gen = _genrevdescendantsofdepth(repo, revs, followfirst,
                                        startdepth, stopdepth)
    return generatorset(gen, iterasc=True)

def descendantrevs(revs, revsfn, parentrevsfn):
    """Generate revision number descendants in revision order.

    Yields revision numbers starting with a child of some rev in
    ``revs``. Results are ordered by revision number and are
    therefore topological. Each revision is not considered a descendant
    of itself.

    ``revsfn`` is a callable that with no argument iterates over all
    revision numbers and with a ``start`` argument iterates over revision
    numbers beginning with that value.

    ``parentrevsfn`` is a callable that receives a revision number and
    returns an iterable of parent revision numbers, whose values may include
    nullrev.
    """
    first = min(revs)

    if first == nullrev:
        for rev in revsfn():
            yield rev
        return

    seen = set(revs)
    for rev in revsfn(start=first + 1):
        for prev in parentrevsfn(rev):
            if prev != nullrev and prev in seen:
                seen.add(rev)
                yield rev
                break

def _reachablerootspure(repo, minroot, roots, heads, includepath):
    """return (heads(::<roots> and ::<heads>))

    If includepath is True, return (<roots>::<heads>)."""
    if not roots:
        return []
    parentrevs = repo.changelog.parentrevs
    roots = set(roots)
    visit = list(heads)
    reachable = set()
    seen = {}
    # prefetch all the things! (because python is slow)
    reached = reachable.add
    dovisit = visit.append
    nextvisit = visit.pop
    # open-code the post-order traversal due to the tiny size of
    # sys.getrecursionlimit()
    while visit:
        rev = nextvisit()
        if rev in roots:
            reached(rev)
            if not includepath:
                continue
        parents = parentrevs(rev)
        seen[rev] = parents
        for parent in parents:
            if parent >= minroot and parent not in seen:
                dovisit(parent)
    if not reachable:
        return baseset()
    if not includepath:
        return reachable
    for rev in sorted(seen):
        for parent in seen[rev]:
            if parent in reachable:
                reached(rev)
    return reachable

def reachableroots(repo, roots, heads, includepath=False):
    """return (heads(::<roots> and ::<heads>))

    If includepath is True, return (<roots>::<heads>)."""
    if not roots:
        return baseset()
    minroot = roots.min()
    roots = list(roots)
    heads = list(heads)
    try:
        revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
    except AttributeError:
        revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
    revs = baseset(revs)
    revs.sort()
    return revs

def _changesrange(fctx1, fctx2, linerange2, diffopts):
    """Return `(diffinrange, linerange1)` where `diffinrange` is True
    if diff from fctx2 to fctx1 has changes in linerange2 and
    `linerange1` is the new line range for fctx1.
    """
    blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts)
    filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2)
    diffinrange = any(stype == '!' for _, stype in filteredblocks)
    return diffinrange, linerange1

def blockancestors(fctx, fromline, toline, followfirst=False):
    """Yield ancestors of `fctx` with respect to the block of lines within
    `fromline`-`toline` range.
    """
    diffopts = patch.diffopts(fctx._repo.ui)
    fctx = fctx.introfilectx()
    visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))}
    while visit:
        c, linerange2 = visit.pop(max(visit))
        pl = c.parents()
        if followfirst:
            pl = pl[:1]
        if not pl:
            # The block originates from the initial revision.
            yield c, linerange2
            continue
        inrange = False
        for p in pl:
            inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts)
            inrange = inrange or inrangep
            if linerange1[0] == linerange1[1]:
                # Parent's linerange is empty, meaning that the block got
                # introduced in this revision; no need to go futher in this
                # branch.
                continue
            # Set _descendantrev with 'c' (a known descendant) so that, when
            # _adjustlinkrev is called for 'p', it receives this descendant
            # (as srcrev) instead possibly topmost introrev.
            p._descendantrev = c.rev()
            visit[p.linkrev(), p.filenode()] = p, linerange1
        if inrange:
            yield c, linerange2

def blockdescendants(fctx, fromline, toline):
    """Yield descendants of `fctx` with respect to the block of lines within
    `fromline`-`toline` range.
    """
    # First possibly yield 'fctx' if it has changes in range with respect to
    # its parents.
    try:
        c, linerange1 = next(blockancestors(fctx, fromline, toline))
    except StopIteration:
        pass
    else:
        if c == fctx:
            yield c, linerange1

    diffopts = patch.diffopts(fctx._repo.ui)
    fl = fctx.filelog()
    seen = {fctx.filerev(): (fctx, (fromline, toline))}
    for i in fl.descendants([fctx.filerev()]):
        c = fctx.filectx(i)
        inrange = False
        for x in fl.parentrevs(i):
            try:
                p, linerange2 = seen[x]
            except KeyError:
                # nullrev or other branch
                continue
            inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts)
            inrange = inrange or inrangep
            # If revision 'i' has been seen (it's a merge) and the line range
            # previously computed differs from the one we just got, we take the
            # surrounding interval. This is conservative but avoids loosing
            # information.
            if i in seen and seen[i][1] != linerange1:
                lbs, ubs = zip(linerange1, seen[i][1])
                linerange1 = min(lbs), max(ubs)
            seen[i] = c, linerange1
        if inrange:
            yield c, linerange1

@attr.s(slots=True, frozen=True)
class annotateline(object):
    fctx = attr.ib()
    lineno = attr.ib()
    # Whether this annotation was the result of a skip-annotate.
    skip = attr.ib(default=False)
    text = attr.ib(default=None)

@attr.s(slots=True, frozen=True)
class _annotatedfile(object):
    # list indexed by lineno - 1
    fctxs = attr.ib()
    linenos = attr.ib()
    skips = attr.ib()
    # full file content
    text = attr.ib()

def _countlines(text):
    if text.endswith("\n"):
        return text.count("\n")
    return text.count("\n") + int(bool(text))

def _decoratelines(text, fctx):
    n = _countlines(text)
    linenos = pycompat.rangelist(1, n + 1)
    return _annotatedfile([fctx] * n, linenos, [False] * n, text)

def _annotatepair(parents, childfctx, child, skipchild, diffopts):
    r'''
    Given parent and child fctxes and annotate data for parents, for all lines
    in either parent that match the child, annotate the child with the parent's
    data.

    Additionally, if `skipchild` is True, replace all other lines with parent
    annotate data as well such that child is never blamed for any lines.

    See test-annotate.py for unit tests.
    '''
    pblocks = [(parent, mdiff.allblocks(parent.text, child.text, opts=diffopts))
               for parent in parents]

    if skipchild:
        # Need to iterate over the blocks twice -- make it a list
        pblocks = [(p, list(blocks)) for (p, blocks) in pblocks]
    # Mercurial currently prefers p2 over p1 for annotate.
    # TODO: change this?
    for parent, blocks in pblocks:
        for (a1, a2, b1, b2), t in blocks:
            # Changed blocks ('!') or blocks made only of blank lines ('~')
            # belong to the child.
            if t == '=':
                child.fctxs[b1:b2] = parent.fctxs[a1:a2]
                child.linenos[b1:b2] = parent.linenos[a1:a2]
                child.skips[b1:b2] = parent.skips[a1:a2]

    if skipchild:
        # Now try and match up anything that couldn't be matched,
        # Reversing pblocks maintains bias towards p2, matching above
        # behavior.
        pblocks.reverse()

        # The heuristics are:
        # * Work on blocks of changed lines (effectively diff hunks with -U0).
        # This could potentially be smarter but works well enough.
        # * For a non-matching section, do a best-effort fit. Match lines in
        #   diff hunks 1:1, dropping lines as necessary.
        # * Repeat the last line as a last resort.

        # First, replace as much as possible without repeating the last line.
        remaining = [(parent, []) for parent, _blocks in pblocks]
        for idx, (parent, blocks) in enumerate(pblocks):
            for (a1, a2, b1, b2), _t in blocks:
                if a2 - a1 >= b2 - b1:
                    for bk in pycompat.xrange(b1, b2):
                        if child.fctxs[bk] == childfctx:
                            ak = min(a1 + (bk - b1), a2 - 1)
                            child.fctxs[bk] = parent.fctxs[ak]
                            child.linenos[bk] = parent.linenos[ak]
                            child.skips[bk] = True
                else:
                    remaining[idx][1].append((a1, a2, b1, b2))

        # Then, look at anything left, which might involve repeating the last
        # line.
        for parent, blocks in remaining:
            for a1, a2, b1, b2 in blocks:
                for bk in pycompat.xrange(b1, b2):
                    if child.fctxs[bk] == childfctx:
                        ak = min(a1 + (bk - b1), a2 - 1)
                        child.fctxs[bk] = parent.fctxs[ak]
                        child.linenos[bk] = parent.linenos[ak]
                        child.skips[bk] = True
    return child

def annotate(base, parents, skiprevs=None, diffopts=None):
    """Core algorithm for filectx.annotate()

    `parents(fctx)` is a function returning a list of parent filectxs.
    """

    # This algorithm would prefer to be recursive, but Python is a
    # bit recursion-hostile. Instead we do an iterative
    # depth-first search.

    # 1st DFS pre-calculates pcache and needed
    visit = [base]
    pcache = {}
    needed = {base: 1}
    while visit:
        f = visit.pop()
        if f in pcache:
            continue
        pl = parents(f)
        pcache[f] = pl
        for p in pl:
            needed[p] = needed.get(p, 0) + 1
            if p not in pcache:
                visit.append(p)

    # 2nd DFS does the actual annotate
    visit[:] = [base]
    hist = {}
    while visit:
        f = visit[-1]
        if f in hist:
            visit.pop()
            continue

        ready = True
        pl = pcache[f]
        for p in pl:
            if p not in hist:
                ready = False
                visit.append(p)
        if ready:
            visit.pop()
            curr = _decoratelines(f.data(), f)
            skipchild = False
            if skiprevs is not None:
                skipchild = f._changeid in skiprevs
            curr = _annotatepair([hist[p] for p in pl], f, curr, skipchild,
                                 diffopts)
            for p in pl:
                if needed[p] == 1:
                    del hist[p]
                    del needed[p]
                else:
                    needed[p] -= 1

            hist[f] = curr
            del pcache[f]

    a = hist[base]
    return [annotateline(*r) for r in zip(a.fctxs, a.linenos, a.skips,
                                          mdiff.splitnewlines(a.text))]

def toposort(revs, parentsfunc, firstbranch=()):
    """Yield revisions from heads to roots one (topo) branch at a time.

    This function aims to be used by a graph generator that wishes to minimize
    the number of parallel branches and their interleaving.

    Example iteration order (numbers show the "true" order in a changelog):

      o  4
      |
      o  1
      |
      | o  3
      | |
      | o  2
      |/
      o  0

    Note that the ancestors of merges are understood by the current
    algorithm to be on the same branch. This means no reordering will
    occur behind a merge.
    """

    ### Quick summary of the algorithm
    #
    # This function is based around a "retention" principle. We keep revisions
    # in memory until we are ready to emit a whole branch that immediately
    # "merges" into an existing one. This reduces the number of parallel
    # branches with interleaved revisions.
    #
    # During iteration revs are split into two groups:
    # A) revision already emitted
    # B) revision in "retention". They are stored as different subgroups.
    #
    # for each REV, we do the following logic:
    #
    #   1) if REV is a parent of (A), we will emit it. If there is a
    #   retention group ((B) above) that is blocked on REV being
    #   available, we emit all the revisions out of that retention
    #   group first.
    #
    #   2) else, we'll search for a subgroup in (B) awaiting for REV to be
    #   available, if such subgroup exist, we add REV to it and the subgroup is
    #   now awaiting for REV.parents() to be available.
    #
    #   3) finally if no such group existed in (B), we create a new subgroup.
    #
    #
    # To bootstrap the algorithm, we emit the tipmost revision (which
    # puts it in group (A) from above).

    revs.sort(reverse=True)

    # Set of parents of revision that have been emitted. They can be considered
    # unblocked as the graph generator is already aware of them so there is no
    # need to delay the revisions that reference them.
    #
    # If someone wants to prioritize a branch over the others, pre-filling this
    # set will force all other branches to wait until this branch is ready to be
    # emitted.
    unblocked = set(firstbranch)

    # list of groups waiting to be displayed, each group is defined by:
    #
    #   (revs:    lists of revs waiting to be displayed,
    #    blocked: set of that cannot be displayed before those in 'revs')
    #
    # The second value ('blocked') correspond to parents of any revision in the
    # group ('revs') that is not itself contained in the group. The main idea
    # of this algorithm is to delay as much as possible the emission of any
    # revision.  This means waiting for the moment we are about to display
    # these parents to display the revs in a group.
    #
    # This first implementation is smart until it encounters a merge: it will
    # emit revs as soon as any parent is about to be emitted and can grow an
    # arbitrary number of revs in 'blocked'. In practice this mean we properly
    # retains new branches but gives up on any special ordering for ancestors
    # of merges. The implementation can be improved to handle this better.
    #
    # The first subgroup is special. It corresponds to all the revision that
    # were already emitted. The 'revs' lists is expected to be empty and the
    # 'blocked' set contains the parents revisions of already emitted revision.
    #
    # You could pre-seed the <parents> set of groups[0] to a specific
    # changesets to select what the first emitted branch should be.
    groups = [([], unblocked)]
    pendingheap = []
    pendingset = set()

    heapq.heapify(pendingheap)
    heappop = heapq.heappop
    heappush = heapq.heappush
    for currentrev in revs:
        # Heap works with smallest element, we want highest so we invert
        if currentrev not in pendingset:
            heappush(pendingheap, -currentrev)
            pendingset.add(currentrev)
        # iterates on pending rev until after the current rev have been
        # processed.
        rev = None
        while rev != currentrev:
            rev = -heappop(pendingheap)
            pendingset.remove(rev)

            # Seek for a subgroup blocked, waiting for the current revision.
            matching = [i for i, g in enumerate(groups) if rev in g[1]]

            if matching:
                # The main idea is to gather together all sets that are blocked
                # on the same revision.
                #
                # Groups are merged when a common blocking ancestor is
                # observed. For example, given two groups:
                #
                # revs [5, 4] waiting for 1
                # revs [3, 2] waiting for 1
                #
                # These two groups will be merged when we process
                # 1. In theory, we could have merged the groups when
                # we added 2 to the group it is now in (we could have
                # noticed the groups were both blocked on 1 then), but
                # the way it works now makes the algorithm simpler.
                #
                # We also always keep the oldest subgroup first. We can
                # probably improve the behavior by having the longest set
                # first. That way, graph algorithms could minimise the length
                # of parallel lines their drawing. This is currently not done.
                targetidx = matching.pop(0)
                trevs, tparents = groups[targetidx]
                for i in matching:
                    gr = groups[i]
                    trevs.extend(gr[0])
                    tparents |= gr[1]
                # delete all merged subgroups (except the one we kept)
                # (starting from the last subgroup for performance and
                # sanity reasons)
                for i in reversed(matching):
                    del groups[i]
            else:
                # This is a new head. We create a new subgroup for it.
                targetidx = len(groups)
                groups.append(([], {rev}))

            gr = groups[targetidx]

            # We now add the current nodes to this subgroups. This is done
            # after the subgroup merging because all elements from a subgroup
            # that relied on this rev must precede it.
            #
            # we also update the <parents> set to include the parents of the
            # new nodes.
            if rev == currentrev: # only display stuff in rev
                gr[0].append(rev)
            gr[1].remove(rev)
            parents = [p for p in parentsfunc(rev) if p > node.nullrev]
            gr[1].update(parents)
            for p in parents:
                if p not in pendingset:
                    pendingset.add(p)
                    heappush(pendingheap, -p)

            # Look for a subgroup to display
            #
            # When unblocked is empty (if clause), we were not waiting for any
            # revisions during the first iteration (if no priority was given) or
            # if we emitted a whole disconnected set of the graph (reached a
            # root).  In that case we arbitrarily take the oldest known
            # subgroup. The heuristic could probably be better.
            #
            # Otherwise (elif clause) if the subgroup is blocked on
            # a revision we just emitted, we can safely emit it as
            # well.
            if not unblocked:
                if len(groups) > 1:  # display other subset
                    targetidx = 1
                    gr = groups[1]
            elif not gr[1] & unblocked:
                gr = None

            if gr is not None:
                # update the set of awaited revisions with the one from the
                # subgroup
                unblocked |= gr[1]
                # output all revisions in the subgroup
                for r in gr[0]:
                    yield r
                # delete the subgroup that you just output
                # unless it is groups[0] in which case you just empty it.
                if targetidx:
                    del groups[targetidx]
                else:
                    gr[0][:] = []
    # Check if we have some subgroup waiting for revisions we are not going to
    # iterate over
    for g in groups:
        for r in g[0]:
            yield r

def headrevs(revs, parentsfn):
    """Resolve the set of heads from a set of revisions.

    Receives an iterable of revision numbers and a callbable that receives a
    revision number and returns an iterable of parent revision numbers, possibly
    including nullrev.

    Returns a set of revision numbers that are DAG heads within the passed
    subset.

    ``nullrev`` is never included in the returned set, even if it is provided in
    the input set.
    """
    headrevs = set(revs)
    parents = set([node.nullrev])
    up = parents.update

    for rev in revs:
        up(parentsfn(rev))
    headrevs.difference_update(parents)
    return headrevs

def headrevssubset(revsfn, parentrevsfn, startrev=None, stoprevs=None):
    """Returns the set of all revs that have no children with control.

    ``revsfn`` is a callable that with no arguments returns an iterator over
    all revision numbers in topological order. With a ``start`` argument, it
    returns revision numbers starting at that number.

    ``parentrevsfn`` is a callable receiving a revision number and returns an
    iterable of parent revision numbers, where values can include nullrev.

    ``startrev`` is a revision number at which to start the search.

    ``stoprevs`` is an iterable of revision numbers that, when encountered,
    will stop DAG traversal beyond them. Parents of revisions in this
    collection will be heads.
    """
    if startrev is None:
        startrev = nullrev

    stoprevs = set(stoprevs or [])

    reachable = {startrev}
    heads = {startrev}

    for rev in revsfn(start=startrev + 1):
        for prev in parentrevsfn(rev):
            if prev in reachable:
                if rev not in stoprevs:
                    reachable.add(rev)
                heads.add(rev)

            if prev in heads and prev not in stoprevs:
                heads.remove(prev)

    return heads

def linearize(revs, parentsfn):
    """Linearize and topologically sort a list of revisions.

    The linearization process tries to create long runs of revs where a child
    rev comes immediately after its first parent. This is done by visiting the
    heads of the revs in inverse topological order, and for each visited rev,
    visiting its second parent, then its first parent, then adding the rev
    itself to the output list.

    Returns a list of revision numbers.
    """
    visit = list(sorted(headrevs(revs, parentsfn), reverse=True))
    finished = set()
    result = []

    while visit:
        rev = visit.pop()
        if rev < 0:
            rev = -rev - 1

            if rev not in finished:
                result.append(rev)
                finished.add(rev)

        else:
            visit.append(-rev - 1)

            for prev in parentsfn(rev):
                if prev == node.nullrev or prev not in revs or prev in finished:
                    continue

                visit.append(prev)

    assert len(result) == len(revs)

    return result