shrink-revlog: improve performance: use changegroup instead of revisions
authorBenoit Boissinot <benoit.boissinot@ens-lyon.org>
Fri, 04 Dec 2009 15:36:13 +0100
changeset 10009 69dca8574a6a
parent 10008 08bbed8ac9b8
child 10010 2fce96916d97
shrink-revlog: improve performance: use changegroup instead of revisions Before: real 0m23.971s Now: real 0m4.229s The only case where the output would be different is if the newer hg was using a different diff algorithm than used originally.
contrib/shrink-revlog.py
--- a/contrib/shrink-revlog.py	Fri Dec 04 07:52:30 2009 +0100
+++ b/contrib/shrink-revlog.py	Fri Dec 04 15:36:13 2009 +0100
@@ -20,6 +20,7 @@
 import sys, os, tempfile
 import optparse
 from mercurial import ui as ui_, hg, revlog, transaction, node, util
+from mercurial import changegroup
 
 def toposort(rl):
     write = sys.stdout.write
@@ -73,18 +74,23 @@
 def writerevs(r1, r2, order, tr):
     write = sys.stdout.write
     write('writing %d revs ' % len(order))
+
+    count = [0]
+    def progress(*args):
+        if count[0] % 1000 == 0:
+            write('.')
+        count[0] += 1
+
+    order = [r1.node(r) for r in order]
+
+    # this is a bit ugly, but it works
+    lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
+    unlookup = lambda x: int(x, 10)
+
     try:
-        count = 0
-        for rev in order:
-            n = r1.node(rev)
-            p1, p2 = r1.parents(n)
-            l = r1.linkrev(rev)
-            t = r1.revision(n)
-            n2 = r2.addrevision(t, tr, l, p1, p2)
-
-            if count % 1000 == 0:
-                write('.')
-            count += 1
+        group = util.chunkbuffer(r1.group(order, lookup, progress))
+        chunkiter = changegroup.chunkiter(group)
+        r2.addgroup(chunkiter, unlookup, tr)
     finally:
         write('\n')