make incoming work via ssh (issue139); move chunk code into separate module.
authorThomas Arendsen Hein <thomas@intevation.de>
Tue, 21 Mar 2006 11:47:21 +0100
changeset 1981 736b6c96bbbc
parent 1980 dfb796786337
child 1982 70ba0c86da8b
make incoming work via ssh (issue139); move chunk code into separate module. Incoming ssh needs to detect the end of the changegroup, otherwise it would block trying to read from the ssh pipe. This is done by parsing the changegroup chunks. bundlerepo.getchunk() already is identical to localrepo.addchangegroup.getchunk(), which is followed by getgroup which looks much like what you can re-use in bundlerepository.__init__() and in write_bundle(). bundlerevlog.__init__.genchunk() looks very similar, too, as do some while loops in localrepo.py. Applied patch from Benoit Boissinot to move duplicate/related code to mercurial/changegroup.py and use this to fix incoming ssh.
mercurial/bundlerepo.py
mercurial/changegroup.py
mercurial/commands.py
mercurial/localrepo.py
mercurial/revlog.py
--- a/mercurial/bundlerepo.py	Tue Mar 21 06:03:33 2006 +0100
+++ b/mercurial/bundlerepo.py	Tue Mar 21 11:47:21 2006 +0100
@@ -13,25 +13,10 @@
 from node import *
 from i18n import gettext as _
 from demandload import demandload
-demandload(globals(), "util os struct")
+demandload(globals(), "changegroup util os struct")
 
 import localrepo, changelog, manifest, filelog, revlog
 
-def getchunk(source):
-    """get a chunk from a group"""
-    d = source.read(4)
-    if not d:
-        return ""
-    l = struct.unpack(">l", d)[0]
-    if l <= 4:
-        return ""
-    d = source.read(l - 4)
-    if len(d) < l - 4:
-        raise util.Abort(_("premature EOF reading chunk"
-                           " (got %d bytes, expected %d)")
-                          % (len(d), l - 4))
-    return d
-
 class bundlerevlog(revlog.revlog):
     def __init__(self, opener, indexfile, datafile, bundlefile,
                  linkmapper=None):
@@ -46,16 +31,13 @@
         #
         revlog.revlog.__init__(self, opener, indexfile, datafile)
         self.bundlefile = bundlefile
-        def genchunk():
-            while 1:
+        def chunkpositer():
+            for chunk in changegroup.chunkiter(bundlefile):
                 pos = bundlefile.tell()
-                chunk = getchunk(bundlefile)
-                if not chunk:
-                    break
-                yield chunk, pos + 4 # XXX struct.calcsize(">l") == 4
+                yield chunk, pos - len(chunk)
         n = self.count()
         prev = None
-        for chunk, start in genchunk():
+        for chunk, start in chunkpositer():
             size = len(chunk)
             if size < 80:
                 raise util.Abort("invalid changegroup")
@@ -194,12 +176,12 @@
         # dict with the mapping 'filename' -> position in the bundle
         self.bundlefilespos = {}
         while 1:
-                f = getchunk(self.bundlefile)
-                if not f:
-                    break
-                self.bundlefilespos[f] = self.bundlefile.tell()
-                while getchunk(self.bundlefile):
-                    pass
+            f = changegroup.getchunk(self.bundlefile)
+            if not f:
+                break
+            self.bundlefilespos[f] = self.bundlefile.tell()
+            for c in changegroup.chunkiter(self.bundlefile):
+                pass
 
     def dev(self):
         return -1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/changegroup.py	Tue Mar 21 11:47:21 2006 +0100
@@ -0,0 +1,43 @@
+"""
+changegroup.py - Mercurial changegroup manipulation functions
+
+ Copyright 2006 Matt Mackall <mpm@selenic.com>
+
+This software may be used and distributed according to the terms
+of the GNU General Public License, incorporated herein by reference.
+"""
+import struct
+from demandload import *
+demandload(globals(), "util")
+
+def getchunk(source):
+    """get a chunk from a changegroup"""
+    d = source.read(4)
+    if not d:
+        return ""
+    l = struct.unpack(">l", d)[0]
+    if l <= 4:
+        return ""
+    d = source.read(l - 4)
+    if len(d) < l - 4:
+        raise util.Abort(_("premature EOF reading chunk"
+                           " (got %d bytes, expected %d)")
+                          % (len(d), l - 4))
+    return d
+
+def chunkiter(source):
+    """iterate through the chunks in source"""
+    while 1:
+        c = getchunk(source)
+        if not c:
+            break
+        yield c
+
+def genchunk(data):
+    """build a changegroup chunk"""
+    header = struct.pack(">l", len(data)+ 4)
+    return "%s%s" % (header, data)
+
+def closechunk():
+    return struct.pack(">l", 0)
+
--- a/mercurial/commands.py	Tue Mar 21 06:03:33 2006 +0100
+++ b/mercurial/commands.py	Tue Mar 21 11:47:21 2006 +0100
@@ -12,6 +12,7 @@
 demandload(globals(), "fancyopts ui hg util lock revlog templater bundlerepo")
 demandload(globals(), "fnmatch hgweb mdiff random signal tempfile time")
 demandload(globals(), "traceback errno socket version struct atexit sets bz2")
+demandload(globals(), "changegroup")
 
 class UnknownCommand(Exception):
     """Exception raised if command is not in the command table."""
@@ -306,11 +307,17 @@
         else:
             fh.write("HG10UN")
             z = nocompress()
-        while 1:
-            chunk = cg.read(4096)
-            if not chunk:
-                break
-            fh.write(z.compress(chunk))
+        # parse the changegroup data, otherwise we will block
+        # in case of sshrepo because we don't know the end of the stream
+
+        # an empty chunkiter is the end of the changegroup
+        empty = False
+        while not empty:
+            empty = True
+            for chunk in changegroup.chunkiter(cg):
+                empty = False
+                fh.write(z.compress(changegroup.genchunk(chunk)))
+            fh.write(z.compress(changegroup.closechunk()))
         fh.write(z.flush())
         cleanup = None
         return filename
--- a/mercurial/localrepo.py	Tue Mar 21 06:03:33 2006 +0100
+++ b/mercurial/localrepo.py	Tue Mar 21 11:47:21 2006 +0100
@@ -5,12 +5,13 @@
 # This software may be used and distributed according to the terms
 # of the GNU General Public License, incorporated herein by reference.
 
-import struct, os, util
+import os, util
 import filelog, manifest, changelog, dirstate, repo
 from node import *
 from i18n import gettext as _
 from demandload import *
 demandload(globals(), "re lock transaction tempfile stat mdiff errno ui")
+demandload(globals(), "changegroup")
 
 class localrepository(object):
     def __del__(self):
@@ -1244,7 +1245,7 @@
                 # If any filenodes are left, generate the group for them,
                 # otherwise don't bother.
                 if len(msng_filenode_lst) > 0:
-                    yield struct.pack(">l", len(fname) + 4) + fname
+                    yield changegroup.genchunk(fname)
                     # Sort the filenodes by their revision #
                     msng_filenode_lst.sort(cmp_by_rev_func(filerevlog))
                     # Create a group generator and only pass in a changenode
@@ -1258,7 +1259,7 @@
                     # Don't need this anymore, toss it to free memory.
                     del msng_filenode_set[fname]
             # Signal that no more groups are left.
-            yield struct.pack(">l", 0)
+            yield changegroup.closechunk()
 
             self.hook('outgoing', node=hex(msng_cl_lst[0]), source=source)
 
@@ -1318,39 +1319,18 @@
                 nodeiter = gennodelst(filerevlog)
                 nodeiter = list(nodeiter)
                 if nodeiter:
-                    yield struct.pack(">l", len(fname) + 4) + fname
+                    yield changegroup.genchunk(fname)
                     lookup = lookuprevlink_func(filerevlog)
                     for chnk in filerevlog.group(nodeiter, lookup):
                         yield chnk
 
-            yield struct.pack(">l", 0)
+            yield changegroup.closechunk()
             self.hook('outgoing', node=hex(nodes[0]), source=source)
 
         return util.chunkbuffer(gengroup())
 
     def addchangegroup(self, source):
 
-        def getchunk():
-            d = source.read(4)
-            if not d:
-                return ""
-            l = struct.unpack(">l", d)[0]
-            if l <= 4:
-                return ""
-            d = source.read(l - 4)
-            if len(d) < l - 4:
-                raise repo.RepoError(_("premature EOF reading chunk"
-                                       " (got %d bytes, expected %d)")
-                                     % (len(d), l - 4))
-            return d
-
-        def getgroup():
-            while 1:
-                c = getchunk()
-                if not c:
-                    break
-                yield c
-
         def csmap(x):
             self.ui.debug(_("add changeset %s\n") % short(x))
             return self.changelog.count()
@@ -1372,7 +1352,8 @@
         # pull off the changeset group
         self.ui.status(_("adding changesets\n"))
         co = self.changelog.tip()
-        cn = self.changelog.addgroup(getgroup(), csmap, tr, 1) # unique
+        chunkiter = changegroup.chunkiter(source)
+        cn = self.changelog.addgroup(chunkiter, csmap, tr, 1) # unique
         cnr, cor = map(self.changelog.rev, (cn, co))
         if cn == nullid:
             cnr = cor
@@ -1381,18 +1362,20 @@
         # pull off the manifest group
         self.ui.status(_("adding manifests\n"))
         mm = self.manifest.tip()
-        mo = self.manifest.addgroup(getgroup(), revmap, tr)
+        chunkiter = changegroup.chunkiter(source)
+        mo = self.manifest.addgroup(chunkiter, revmap, tr)
 
         # process the files
         self.ui.status(_("adding file changes\n"))
         while 1:
-            f = getchunk()
+            f = changegroup.getchunk(source)
             if not f:
                 break
             self.ui.debug(_("adding %s revisions\n") % f)
             fl = self.file(f)
             o = fl.count()
-            n = fl.addgroup(getgroup(), revmap, tr)
+            chunkiter = changegroup.chunkiter(source)
+            n = fl.addgroup(chunkiter, revmap, tr)
             revisions += fl.count() - o
             files += 1
 
--- a/mercurial/revlog.py	Tue Mar 21 06:03:33 2006 +0100
+++ b/mercurial/revlog.py	Tue Mar 21 11:47:21 2006 +0100
@@ -13,7 +13,8 @@
 from node import *
 from i18n import gettext as _
 from demandload import demandload
-demandload(globals(), "binascii errno heapq mdiff os sha struct zlib")
+demandload(globals(), "binascii changegroup errno heapq mdiff os")
+demandload(globals(), "sha struct zlib")
 
 def hash(text, p1, p2):
     """generate a hash from the given text and its parent hashes
@@ -708,7 +709,7 @@
 
         # if we don't have any revisions touched by these changesets, bail
         if not revs:
-            yield struct.pack(">l", 0)
+            yield changegroup.closechunk()
             return
 
         # add the parent of the first rev
@@ -726,12 +727,9 @@
             d = self.revdiff(a, b)
             p = self.parents(nb)
             meta = nb + p[0] + p[1] + lookup(nb)
-            l = struct.pack(">l", len(meta) + len(d) + 4)
-            yield l
-            yield meta
-            yield d
+            yield changegroup.genchunk("%s%s" % (meta, d))
 
-        yield struct.pack(">l", 0)
+        yield changegroup.closechunk()
 
     def addgroup(self, revs, linkmapper, transaction, unique=0):
         """