ancestors: actually iterate over ancestors in topological order (issue5979)
authorBoris Feld <boris.feld@octobus.net>
Thu, 06 Sep 2018 17:00:28 -0400
changeset 39473 b6db2e80a9ce
parent 39472 4e4fae1dda5c
child 39474 a60dae060bc8
ancestors: actually iterate over ancestors in topological order (issue5979) This code previously used a dequeue logic, the first ancestors seen were the first ancestors to be emitted. In branching/merging situations, it can result in ancestors being yielded before their descendants, breaking the object contract. We got affected by this issue while working on the copy tracing code. At about the same time, Axel Hecht <axel@mozilla.com> reported the issue and provided the test case used in this changeset. Thanks Axel. Running `hg perfancestors` does not show a significant difference between the old and the new version.
mercurial/ancestor.py
tests/test-ancestor.py.out
tests/test-issue5979.t
tests/test-revlog-ancestry.py.out
--- a/mercurial/ancestor.py	Thu Sep 06 22:12:21 2018 +0900
+++ b/mercurial/ancestor.py	Thu Sep 06 17:00:28 2018 -0400
@@ -7,7 +7,6 @@
 
 from __future__ import absolute_import
 
-import collections
 import heapq
 
 from .node import nullrev
@@ -305,14 +304,15 @@
         """Generate the ancestors of _initrevs in reverse topological order.
 
         If inclusive is False, yield a sequence of revision numbers starting
-        with the parents of each revision in revs, i.e., each revision is *not*
-        considered an ancestor of itself.  Results are in breadth-first order:
-        parents of each rev in revs, then parents of those, etc.
+        with the parents of each revision in revs, i.e., each revision is
+        *not* considered an ancestor of itself. Results are emitted in reverse
+        revision number order. That order is also topological: a child is
+        always emitted before its parent.
 
         If inclusive is True, yield all the revs first (ignoring stoprev),
-        then yield all the ancestors of revs as when inclusive is False.
-        If an element in revs is an ancestor of a different rev it is not
-        yielded again."""
+        then yield all the ancestors of revs as when inclusive is False. If an
+        element in revs is an ancestor of a different rev it is not yielded
+        again."""
         seen = set()
         revs = self._initrevs
         if self._inclusive:
@@ -322,17 +322,27 @@
 
         parentrevs = self._parentrevs
         stoprev = self._stoprev
-        visit = collections.deque(revs)
+        visit = []
+        heapq.heapify(visit)
+        schedule = heapq.heappush
+        nextitem = heapq.heappop
+        see = seen.add
+        see(nullrev)
 
-        see = seen.add
-        schedule = visit.append
+        for r in revs:
+            for parent in parentrevs(r):
+                if parent not in seen:
+                    schedule(visit, -parent)
+                    see(parent)
 
         while visit:
-            for parent in parentrevs(visit.popleft()):
-                if parent >= stoprev and parent not in seen:
-                    schedule(parent)
-                    see(parent)
-                    yield parent
+            current = -nextitem(visit)
+            if current >= stoprev:
+                yield current
+                for parent in parentrevs(current):
+                    if parent not in seen:
+                        schedule(visit, -parent)
+                        see(parent)
 
     def __contains__(self, target):
         """Test whether target is an ancestor of self._initrevs."""
--- a/tests/test-ancestor.py.out	Thu Sep 06 22:12:21 2018 +0900
+++ b/tests/test-ancestor.py.out	Thu Sep 06 17:00:28 2018 -0400
@@ -3,16 +3,16 @@
 iteration:  []
 % lazy ancestor set for [11, 13], stoprev = 0, inclusive = False
 membership: [7, 8, 3, 4, 1, 0]
-iteration:  [3, 7, 8, 1, 4, 0, 2]
+iteration:  [8, 7, 4, 3, 2, 1, 0]
 % lazy ancestor set for [1, 3], stoprev = 0, inclusive = False
 membership: [1, 0]
-iteration:  [0, 1]
+iteration:  [1, 0]
 % lazy ancestor set for [11, 13], stoprev = 0, inclusive = True
 membership: [11, 13, 7, 8, 3, 4, 1, 0]
-iteration:  [11, 13, 3, 7, 8, 1, 4, 0, 2]
+iteration:  [11, 13, 8, 7, 4, 3, 2, 1, 0]
 % lazy ancestor set for [11, 13], stoprev = 6, inclusive = False
 membership: [7, 8]
-iteration:  [7, 8]
+iteration:  [8, 7]
 % lazy ancestor set for [11, 13], stoprev = 6, inclusive = True
 membership: [11, 13, 7, 8]
-iteration:  [11, 13, 7, 8]
+iteration:  [11, 13, 8, 7]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-issue5979.t	Thu Sep 06 17:00:28 2018 -0400
@@ -0,0 +1,34 @@
+  $ hg init r1
+  $ cd r1
+  $ hg ci --config ui.allowemptycommit=true -m c0
+  $ hg ci --config ui.allowemptycommit=true -m c1
+  $ hg ci --config ui.allowemptycommit=true -m c2
+  $ hg co -q 0
+  $ hg ci --config ui.allowemptycommit=true -m c3
+  created new head
+  $ hg co -q 3
+  $ hg merge --quiet
+  $ hg ci --config ui.allowemptycommit=true -m c4
+
+  $ hg log -G -T'{desc}'
+  @    c4
+  |\
+  | o  c3
+  | |
+  o |  c2
+  | |
+  o |  c1
+  |/
+  o  c0
+  
+
+  >>> from mercurial.ui import ui
+  >>> from mercurial.hg import repository
+  >>> repo = repository(ui())
+  >>> for anc in repo.changelog.ancestors([4], inclusive=True):
+  ...   print(anc)
+  4
+  3
+  2
+  1
+  0
--- a/tests/test-revlog-ancestry.py.out	Thu Sep 06 22:12:21 2018 +0900
+++ b/tests/test-revlog-ancestry.py.out	Thu Sep 06 17:00:28 2018 -0400
@@ -1,13 +1,13 @@
 Ancestors of 5
 4 2 0 
 Ancestors of 6 and 5
-3 4 2 1 0 
+4 3 2 1 0 
 Ancestors of 5 and 4
 4 2 0 
 Ancestors of 7, stop at 6
 6 
 Ancestors of 7, including revs
-7 6 5 3 4 2 1 0 
+7 6 5 4 3 2 1 0 
 Ancestors of 7, 5 and 3, including revs
 7 5 3 6 4 2 1 0