hg: establish a cache for localrepository instances
authorGregory Szorc <gregory.szorc@gmail.com>
Sat, 22 Aug 2015 18:54:34 -0700
changeset 26219 ae33fff17c1e
parent 26218 7d45ec47c0af
child 26220 a43328baa2ac
hg: establish a cache for localrepository instances hgweb contained code for determining whether a cached localrepository instance was up to date. This code was way too low-level to be in hgweb. This functionality has been moved to a new "cachedlocalrepo" class in hg.py. The code has been changed slightly to facilitate use inside a class. hgweb has been refactored to use the new API. As part of this refactor, hgweb.repo no longer exists! We're very close to using a distinct repo instance per thread. The new cache records state when it is created. This intelligence prevents an extra localrepository from being created on the first hgweb request. This is why some redundant output from test-extension.t has gone away.
mercurial/hg.py
mercurial/hgweb/hgweb_mod.py
tests/test-extension.t
tests/test-hgweb-non-interactive.t
--- a/mercurial/hg.py	Sat Aug 22 18:15:42 2015 -0700
+++ b/mercurial/hg.py	Sat Aug 22 18:54:34 2015 -0700
@@ -823,3 +823,72 @@
         dst.setconfig('web', 'cacerts', util.expandpath(v), 'copied')
 
     return dst
+
+# Files of interest
+# Used to check if the repository has changed looking at mtime and size of
+# theses files.
+foi = [('spath', '00changelog.i'),
+       ('spath', 'phaseroots'), # ! phase can change content at the same size
+       ('spath', 'obsstore'),
+       ('path', 'bookmarks'), # ! bookmark can change content at the same size
+      ]
+
+class cachedlocalrepo(object):
+    """Holds a localrepository that can be cached and reused."""
+
+    def __init__(self, repo):
+        """Create a new cached repo from an existing repo.
+
+        We assume the passed in repo was recently created. If the
+        repo has changed between when it was created and when it was
+        turned into a cache, it may not refresh properly.
+        """
+        assert isinstance(repo, localrepo.localrepository)
+        self._repo = repo
+        self._state, self.mtime = self._repostate()
+
+    def fetch(self):
+        """Refresh (if necessary) and return a repository.
+
+        If the cached instance is out of date, it will be recreated
+        automatically and returned.
+
+        Returns a tuple of the repo and a boolean indicating whether a new
+        repo instance was created.
+        """
+        # We compare the mtimes and sizes of some well-known files to
+        # determine if the repo changed. This is not precise, as mtimes
+        # are susceptible to clock skew and imprecise filesystems and
+        # file content can change while maintaining the same size.
+
+        state, mtime = self._repostate()
+        if state == self._state:
+            return self._repo, False
+
+        self._repo = repository(self._repo.baseui, self._repo.url())
+        self._state = state
+        self.mtime = mtime
+
+        return self._repo, True
+
+    def _repostate(self):
+        state = []
+        maxmtime = -1
+        for attr, fname in foi:
+            prefix = getattr(self._repo, attr)
+            p = os.path.join(prefix, fname)
+            try:
+                st = os.stat(p)
+            except OSError:
+                st = os.stat(prefix)
+            state.append((st.st_mtime, st.st_size))
+            maxmtime = max(maxmtime, st.st_mtime)
+
+        return tuple(state), maxmtime
+
+    def copy(self):
+        """Obtain a copy of this class instance."""
+        c = cachedlocalrepo(self._repo)
+        c._state = self._state
+        c.mtime = self.mtime
+        return c
--- a/mercurial/hgweb/hgweb_mod.py	Sat Aug 22 18:15:42 2015 -0700
+++ b/mercurial/hgweb/hgweb_mod.py	Sat Aug 22 18:54:34 2015 -0700
@@ -25,15 +25,6 @@
     'pushkey': 'push',
 }
 
-## Files of interest
-# Used to check if the repository has changed looking at mtime and size of
-# theses files. This should probably be relocated a bit higher in core.
-foi = [('spath', '00changelog.i'),
-       ('spath', 'phaseroots'), # ! phase can change content at the same size
-       ('spath', 'obsstore'),
-       ('path', 'bookmarks'), # ! bookmark can change content at the same size
-      ]
-
 def makebreadcrumb(url, prefix=''):
     '''Return a 'URL breadcrumb' list
 
@@ -66,8 +57,8 @@
     is prone to race conditions. Instances of this class exist to hold
     mutable and race-free state for requests.
     """
-    def __init__(self, app):
-        self.repo = app.repo
+    def __init__(self, app, repo):
+        self.repo = repo
         self.reponame = app.reponame
 
         self.archives = ('zip', 'gz', 'bz2')
@@ -217,10 +208,8 @@
         # break some wsgi implementation.
         r.ui.setconfig('progress', 'disable', 'true', 'hgweb')
         r.baseui.setconfig('progress', 'disable', 'true', 'hgweb')
-        self.repo = self._webifyrepo(r)
+        self._repo = hg.cachedlocalrepo(self._webifyrepo(r))
         hook.redirect(True)
-        self.repostate = None
-        self.mtime = -1
         self.reponame = name
 
     def _webifyrepo(self, repo):
@@ -228,25 +217,13 @@
         self.websubtable = webutil.getwebsubs(repo)
         return repo
 
-    def refresh(self):
-        repostate = []
-        mtime = 0
-        # file of interrests mtime and size
-        for meth, fname in foi:
-            prefix = getattr(self.repo, meth)
-            st = get_stat(prefix, fname)
-            repostate.append((st.st_mtime, st.st_size))
-            mtime = max(mtime, st.st_mtime)
-        repostate = tuple(repostate)
-        # we need to compare file size in addition to mtime to catch
-        # changes made less than a second ago
-        if repostate != self.repostate:
-            r = hg.repository(self.repo.baseui, self.repo.url())
-            self.repo = self._webifyrepo(r)
-            # update these last to avoid threads seeing empty settings
-            self.repostate = repostate
-            # mtime is needed for ETag
-            self.mtime = mtime
+    def _getrepo(self):
+        r, created = self._repo.fetch()
+        if created:
+            r = self._webifyrepo(r)
+
+        self.mtime = self._repo.mtime
+        return r
 
     def run(self):
         """Start a server from CGI environment.
@@ -274,8 +251,8 @@
         This is typically only called by Mercurial. External consumers
         should be using instances of this class as the WSGI application.
         """
-        self.refresh()
-        rctx = requestcontext(self)
+        repo = self._getrepo()
+        rctx = requestcontext(self, repo)
 
         # This state is global across all threads.
         encoding.encoding = rctx.config('web', 'encoding', encoding.encoding)
--- a/tests/test-extension.t	Sat Aug 22 18:15:42 2015 -0700
+++ b/tests/test-extension.t	Sat Aug 22 18:54:34 2015 -0700
@@ -115,8 +115,6 @@
   3) bar extsetup
   4) foo reposetup
   4) bar reposetup
-  4) foo reposetup
-  4) bar reposetup
 
   $ echo 'foo = !' >> $HGRCPATH
   $ echo 'bar = !' >> $HGRCPATH
--- a/tests/test-hgweb-non-interactive.t	Sat Aug 22 18:15:42 2015 -0700
+++ b/tests/test-hgweb-non-interactive.t	Sat Aug 22 18:54:34 2015 -0700
@@ -64,7 +64,7 @@
   > print '---- OS.ENVIRON wsgi variables'
   > print sorted([x for x in os.environ if x.startswith('wsgi')])
   > print '---- request.ENVIRON wsgi variables'
-  > print sorted([x for x in i.repo.ui.environ if x.startswith('wsgi')])
+  > print sorted([x for x in i._getrepo().ui.environ if x.startswith('wsgi')])
   > EOF
   $ python request.py
   ---- STATUS