Created a class in util called chunkbuffer that buffers reads from an
authorEric Hopper <hopper@omnifarious.org>
Sun, 04 Sep 2005 14:11:51 -0700
changeset 1199 78ceaf83f28f
parent 1198 66f7d3946109
child 1200 333de1d53846
Created a class in util called chunkbuffer that buffers reads from an iterator over strings (aka chunks). Also added to util (for future use) is a generator function that iterates over a file n bytes at a time. Lastly, localrepo was changed to use this new chunkbuffer class when reading changegroups form the local repository.
mercurial/localrepo.py
mercurial/util.py
--- a/mercurial/localrepo.py	Sat Sep 03 23:52:39 2005 -0700
+++ b/mercurial/localrepo.py	Sun Sep 04 14:11:51 2005 -0700
@@ -888,21 +888,7 @@
         return remote.addchangegroup(cg)
 
     def changegroup(self, basenodes):
-        class genread:
-            def __init__(self, generator):
-                self.g = generator
-                self.buf = ""
-            def fillbuf(self):
-                self.buf += "".join(self.g)
-
-            def read(self, l):
-                while l > len(self.buf):
-                    try:
-                        self.buf += self.g.next()
-                    except StopIteration:
-                        break
-                d, self.buf = self.buf[:l], self.buf[l:]
-                return d
+        genread = util.chunkbuffer
 
         def gengroup():
             nodes = self.newer(basenodes)
--- a/mercurial/util.py	Sat Sep 03 23:52:39 2005 -0700
+++ b/mercurial/util.py	Sun Sep 04 14:11:51 2005 -0700
@@ -12,7 +12,7 @@
 
 import os, errno
 from demandload import *
-demandload(globals(), "re")
+demandload(globals(), "re cStringIO")
 
 def binary(s):
     """return true if a string is binary data using diff's heuristic"""
@@ -352,3 +352,71 @@
             val = os.WSTOPSIG(code)
             return "stopped by signal %d" % val, val
         raise ValueError("invalid exit code")
+
+class chunkbuffer(object):
+    """Allow arbitrary sized chunks of data to be efficiently read from an
+    iterator over chunks of arbitrary size."""
+    def __init__(self, in_iter, targetsize = 2**16):
+        """in_iter is the iterator that's iterating over the input chunks.
+        targetsize is how big a buffer to try to maintain."""
+        self.in_iter = iter(in_iter)
+        self.buf = ''
+        targetsize = int(targetsize)
+        if (targetsize <= 0):
+            raise ValueError("targetsize must be greater than 0, was %d" % targetsize)
+        self.targetsize = int(targetsize)
+        self.iterempty = False
+    def fillbuf(self):
+        """x.fillbuf()
+
+        Ignore the target size, and just read every chunk from the iterator
+        until it's empty."""
+        if not self.iterempty:
+            collector = cStringIO.StringIO()
+            collector.write(self.buf)
+            for ch in self.in_iter:
+                collector.write(ch)
+            self.buf = collector.getvalue()
+            collector.close()
+            collector = None
+            self.iterempty = True
+
+    def read(self, l):
+        """x.read(l) -> str
+        Read l bytes of data from the iterator of chunks of data.  Returns less
+        than l bytes if the iterator runs dry."""
+        if l > len(self.buf) and not self.iterempty:
+            # Clamp to a multiple of self.targetsize
+            targetsize = self.targetsize * ((l // self.targetsize) + 1)
+            collector = cStringIO.StringIO()
+            collector.write(self.buf)
+            collected = len(self.buf)
+            for chunk in self.in_iter:
+                collector.write(chunk)
+                collected += len(chunk)
+                if collected >= targetsize:
+                    break
+            if collected < targetsize:
+                self.iterempty = True
+            self.buf = collector.getvalue()
+            collector.close()
+            collector = None
+        s = self.buf[:l]
+        self.buf = buffer(self.buf, l)
+        return s
+    def __repr__(self):
+        return "<%s.%s targetsize = %u buffered = %u bytes>" % \
+               (self.__class__.__module__, self.__class__.__name__,
+                self.targetsize, len(self.buf))
+
+def filechunkiter(f, size = 65536):
+    """filechunkiter(file[, size]) -> generator
+
+    Create a generator that produces all the data in the file size (default
+    65536) bytes at a time.  Chunks may be less than size bytes if the
+    chunk is the last chunk in the file, or the file is a socket or some
+    other type of file that sometimes reads less data than is requested."""
+    s = f.read(size)
+    while len(s) >= 0:
+        yield s
+        s = f.read(size)