templater: return data in increasing chunk sizes
authorBrendan Cully <brendan@kublai.com>
Fri, 21 Nov 2008 15:51:40 -0800
changeset 7396 526c40a74bd0
parent 7395 e2048f5c7bf5
child 7397 4c92d8971809
templater: return data in increasing chunk sizes Currently hgweb is not streaming its output -- it accumulates the entire response before sending it. This patch restores streaming behaviour. To avoid having to synchronously write many tiny fragments, this patch also adds buffering to the template generator. Local testing of a fetch of a 100,000 line file with wget produces a slight slowdown overall (up from 6.5 seconds to 7.2 seconds), but instead of waiting 6 seconds for headers to arrive, output begins immediately.
mercurial/hgweb/hgweb_mod.py
mercurial/hgweb/hgwebdir_mod.py
mercurial/templater.py
mercurial/util.py
--- a/mercurial/hgweb/hgweb_mod.py	Sat Nov 22 16:57:49 2008 +0100
+++ b/mercurial/hgweb/hgweb_mod.py	Fri Nov 21 15:51:40 2008 -0800
@@ -182,20 +182,20 @@
                 content = getattr(webcommands, cmd)(self, req, tmpl)
                 req.respond(HTTP_OK, ctype)
 
-            return ''.join(content),
+            return content
 
         except revlog.LookupError, err:
             req.respond(HTTP_NOT_FOUND, ctype)
             msg = str(err)
             if 'manifest' not in msg:
                 msg = 'revision not found: %s' % err.name
-            return ''.join(tmpl('error', error=msg)),
+            return tmpl('error', error=msg)
         except (RepoError, revlog.RevlogError), inst:
             req.respond(HTTP_SERVER_ERROR, ctype)
-            return ''.join(tmpl('error', error=str(inst))),
+            return tmpl('error', error=str(inst))
         except ErrorResponse, inst:
             req.respond(inst.code, ctype)
-            return ''.join(tmpl('error', error=inst.message)),
+            return tmpl('error', error=inst.message)
 
     def templater(self, req):
 
--- a/mercurial/hgweb/hgwebdir_mod.py	Sat Nov 22 16:57:49 2008 +0100
+++ b/mercurial/hgweb/hgwebdir_mod.py	Fri Nov 21 15:51:40 2008 -0800
@@ -116,7 +116,7 @@
                 # top-level index
                 elif not virtual:
                     req.respond(HTTP_OK, ctype)
-                    return ''.join(self.makeindex(req, tmpl)),
+                    return self.makeindex(req, tmpl)
 
                 # nested indexes and hgwebs
 
@@ -138,7 +138,7 @@
                     subdir = virtual + '/'
                     if [r for r in repos if r.startswith(subdir)]:
                         req.respond(HTTP_OK, ctype)
-                        return ''.join(self.makeindex(req, tmpl, subdir)),
+                        return self.makeindex(req, tmpl, subdir)
 
                     up = virtual.rfind('/')
                     if up < 0:
@@ -147,11 +147,11 @@
 
                 # prefixes not found
                 req.respond(HTTP_NOT_FOUND, ctype)
-                return ''.join(tmpl("notfound", repo=virtual)),
+                return tmpl("notfound", repo=virtual)
 
             except ErrorResponse, err:
                 req.respond(err.code, ctype)
-                return ''.join(tmpl('error', error=err.message or '')),
+                return tmpl('error', error=err.message or '')
         finally:
             tmpl = None
 
--- a/mercurial/templater.py	Sat Nov 22 16:57:49 2008 +0100
+++ b/mercurial/templater.py	Fri Nov 21 15:51:40 2008 -0800
@@ -44,7 +44,8 @@
     template_re = re.compile(r"(?:(?:#(?=[\w\|%]+#))|(?:{(?=[\w\|%]+})))"
                              r"(\w+)(?:(?:%(\w+))|((?:\|\w+)*))[#}]")
 
-    def __init__(self, mapfile, filters={}, defaults={}, cache={}):
+    def __init__(self, mapfile, filters={}, defaults={}, cache={},
+                 minchunk=1024, maxchunk=65536):
         '''set up template engine.
         mapfile is name of file to read map definitions from.
         filters is dict of functions. each transforms a value into another.
@@ -55,6 +56,7 @@
         self.base = (mapfile and os.path.dirname(mapfile)) or ''
         self.filters = filters
         self.defaults = defaults
+        self.minchunk, self.maxchunk = minchunk, maxchunk
 
         if not mapfile:
             return
@@ -130,6 +132,13 @@
                 yield v
 
     def __call__(self, t, **map):
+        stream = self.expand(t, **map)
+        if self.minchunk:
+            stream = util.increasingchunks(stream, min=self.minchunk,
+                                           max=self.maxchunk)
+        return stream
+        
+    def expand(self, t, **map):
         '''Perform expansion. t is name of map element to expand. map contains
         added elements for use during expansion. Is a generator.'''
         tmpl = self._template(t)
--- a/mercurial/util.py	Sat Nov 22 16:57:49 2008 +0100
+++ b/mercurial/util.py	Fri Nov 21 15:51:40 2008 -0800
@@ -290,6 +290,37 @@
     l.sort()
     return l
 
+def increasingchunks(source, min=1024, max=65536):
+    '''return no less than min bytes per chunk while data remains,
+    doubling min after each chunk until it reaches max'''
+    def log2(x):
+        if not x:
+            return 0
+        i = 0
+        while x:
+            x >>= 1
+            i += 1
+        return i - 1
+
+    buf = []
+    blen = 0
+    for chunk in source:
+        buf.append(chunk)
+        blen += len(chunk)
+        if blen >= min:
+            if min < max:
+                min = min << 1
+                nmin = 1 << log2(blen)
+                if nmin > min:
+                    min = nmin
+                if min > max:
+                    min = max
+            yield ''.join(buf)
+            blen = 0
+            buf = []
+    if buf:
+        yield ''.join(buf)
+
 class Abort(Exception):
     """Raised if a command needs to print an error and exit."""