revlog: remove lazy index
authorMatt Mackall <mpm@selenic.com>
Tue, 04 Jan 2011 14:12:52 -0600
changeset 13253 61c9bc3da402
parent 13252 9f6afc288702
child 13254 5ef5eb1f3515
revlog: remove lazy index
contrib/perf.py
mercurial/localrepo.py
mercurial/pure/parsers.py
mercurial/revlog.py
mercurial/statichttprepo.py
tests/test-parseindex2.py
--- a/contrib/perf.py	Tue Jan 11 14:58:17 2011 -0600
+++ b/contrib/perf.py	Tue Jan 04 14:12:52 2011 -0600
@@ -83,8 +83,7 @@
     import mercurial.changelog
     def d():
         t = repo.changelog.tip()
-        repo.changelog = mercurial.changelog.changelog(repo.sopener)
-        repo.changelog._loadindexmap()
+        repo.invalidate()
     timer(d)
 
 def perfstartup(ui, repo):
--- a/mercurial/localrepo.py	Tue Jan 11 14:58:17 2011 -0600
+++ b/mercurial/localrepo.py	Tue Jan 04 14:12:52 2011 -0600
@@ -1412,9 +1412,6 @@
         # Nor do we know which filenodes are missing.
         msng_filenode_set = {}
 
-        junk = mnfst.index[len(mnfst) - 1] # Get around a bug in lazyindex
-        junk = None
-
         # A changeset always belongs to itself, so the changenode lookup
         # function for a changenode is identity.
         def identity(x):
--- a/mercurial/pure/parsers.py	Tue Jan 11 14:58:17 2011 -0600
+++ b/mercurial/pure/parsers.py	Tue Jan 04 14:12:52 2011 -0600
@@ -38,7 +38,7 @@
     cache = None
     nodemap = {nullid: nullrev}
     n = off = 0
-    # if we're not using lazymap, always read the whole index
+
     l = len(data) - s
     append = index.append
     if inline:
--- a/mercurial/revlog.py	Tue Jan 11 14:58:17 2011 -0600
+++ b/mercurial/revlog.py	Tue Jan 04 14:12:52 2011 -0600
@@ -38,11 +38,9 @@
 REVIDX_PUNCHED_FLAG = 2
 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
 
-# amount of data read unconditionally, should be >= 4
-# when not inline: threshold for using lazy index
-_prereadsize = 1048576
 # max size of revlog with inline data
 _maxinline = 131072
+_chunksize = 1048576
 
 RevlogError = error.RevlogError
 LookupError = error.LookupError
@@ -121,209 +119,6 @@
         return bin[1:]
     raise RevlogError(_("unknown compression type %r") % t)
 
-class lazyparser(object):
-    """
-    this class avoids the need to parse the entirety of large indices
-    """
-
-    # lazyparser is not safe to use on windows if win32 extensions not
-    # available. it keeps file handle open, which make it not possible
-    # to break hardlinks on local cloned repos.
-
-    def __init__(self, dataf):
-        try:
-            size = util.fstat(dataf).st_size
-        except AttributeError:
-            size = 0
-        self.dataf = dataf
-        self.s = struct.calcsize(indexformatng)
-        self.datasize = size
-        self.l = size // self.s
-        self.index = [None] * self.l
-        self.map = {nullid: nullrev}
-        self.allmap = 0
-        self.all = 0
-        self.mapfind_count = 0
-
-    def loadmap(self):
-        """
-        during a commit, we need to make sure the rev being added is
-        not a duplicate.  This requires loading the entire index,
-        which is fairly slow.  loadmap can load up just the node map,
-        which takes much less time.
-        """
-        if self.allmap:
-            return
-        end = self.datasize
-        self.allmap = 1
-        cur = 0
-        count = 0
-        blocksize = self.s * 256
-        self.dataf.seek(0)
-        while cur < end:
-            data = self.dataf.read(blocksize)
-            off = 0
-            for x in xrange(256):
-                n = data[off + ngshaoffset:off + ngshaoffset + 20]
-                self.map[n] = count
-                count += 1
-                if count >= self.l:
-                    break
-                off += self.s
-            cur += blocksize
-
-    def loadblock(self, blockstart, blocksize, data=None):
-        if self.all:
-            return
-        if data is None:
-            self.dataf.seek(blockstart)
-            if blockstart + blocksize > self.datasize:
-                # the revlog may have grown since we've started running,
-                # but we don't have space in self.index for more entries.
-                # limit blocksize so that we don't get too much data.
-                blocksize = max(self.datasize - blockstart, 0)
-            data = self.dataf.read(blocksize)
-        lend = len(data) // self.s
-        i = blockstart // self.s
-        off = 0
-        # lazyindex supports __delitem__
-        if lend > len(self.index) - i:
-            lend = len(self.index) - i
-        for x in xrange(lend):
-            if self.index[i + x] is None:
-                b = data[off : off + self.s]
-                self.index[i + x] = b
-                n = b[ngshaoffset:ngshaoffset + 20]
-                self.map[n] = i + x
-            off += self.s
-
-    def findnode(self, node):
-        """search backwards through the index file for a specific node"""
-        if self.allmap:
-            return None
-
-        # hg log will cause many many searches for the manifest
-        # nodes.  After we get called a few times, just load the whole
-        # thing.
-        if self.mapfind_count > 8:
-            self.loadmap()
-            if node in self.map:
-                return node
-            return None
-        self.mapfind_count += 1
-        last = self.l - 1
-        while self.index[last] is not None:
-            if last == 0:
-                self.all = 1
-                self.allmap = 1
-                return None
-            last -= 1
-        end = (last + 1) * self.s
-        blocksize = self.s * 256
-        while end >= 0:
-            start = max(end - blocksize, 0)
-            self.dataf.seek(start)
-            data = self.dataf.read(end - start)
-            findend = end - start
-            while True:
-                # we're searching backwards, so we have to make sure
-                # we don't find a changeset where this node is a parent
-                off = data.find(node, 0, findend)
-                findend = off
-                if off >= 0:
-                    i = off / self.s
-                    off = i * self.s
-                    n = data[off + ngshaoffset:off + ngshaoffset + 20]
-                    if n == node:
-                        self.map[n] = i + start / self.s
-                        return node
-                else:
-                    break
-            end -= blocksize
-        return None
-
-    def loadindex(self, i=None, end=None):
-        if self.all:
-            return
-        all = False
-        if i is None:
-            blockstart = 0
-            blocksize = (65536 / self.s) * self.s
-            end = self.datasize
-            all = True
-        else:
-            if end:
-                blockstart = i * self.s
-                end = end * self.s
-                blocksize = end - blockstart
-            else:
-                blockstart = (i & ~1023) * self.s
-                blocksize = self.s * 1024
-                end = blockstart + blocksize
-        while blockstart < end:
-            self.loadblock(blockstart, blocksize)
-            blockstart += blocksize
-        if all:
-            self.all = True
-
-class lazyindex(object):
-    """a lazy version of the index array"""
-    def __init__(self, parser):
-        self.p = parser
-    def __len__(self):
-        return len(self.p.index)
-    def load(self, pos):
-        if pos < 0:
-            pos += len(self.p.index)
-        self.p.loadindex(pos)
-        return self.p.index[pos]
-    def __getitem__(self, pos):
-        return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
-    def __setitem__(self, pos, item):
-        self.p.index[pos] = _pack(indexformatng, *item)
-    def __delitem__(self, pos):
-        del self.p.index[pos]
-    def insert(self, pos, e):
-        self.p.index.insert(pos, _pack(indexformatng, *e))
-    def append(self, e):
-        self.p.index.append(_pack(indexformatng, *e))
-
-class lazymap(object):
-    """a lazy version of the node map"""
-    def __init__(self, parser):
-        self.p = parser
-    def load(self, key):
-        n = self.p.findnode(key)
-        if n is None:
-            raise KeyError(key)
-    def __contains__(self, key):
-        if key in self.p.map:
-            return True
-        self.p.loadmap()
-        return key in self.p.map
-    def __iter__(self):
-        yield nullid
-        for i, ret in enumerate(self.p.index):
-            if not ret:
-                self.p.loadindex(i)
-                ret = self.p.index[i]
-            if isinstance(ret, str):
-                ret = _unpack(indexformatng, ret)
-            yield ret[7]
-    def __getitem__(self, key):
-        try:
-            return self.p.map[key]
-        except KeyError:
-            try:
-                self.load(key)
-                return self.p.map[key]
-            except KeyError:
-                raise KeyError("node " + hex(key))
-    def __setitem__(self, key, val):
-        self.p.map[key] = val
-    def __delitem__(self, key):
-        del self.p.map[key]
-
 indexformatv0 = ">4l20s20s20s"
 v0shaoffset = 56
 
@@ -336,8 +131,6 @@
         index = []
         nodemap =  {nullid: nullrev}
         n = off = 0
-        if len(data) == _prereadsize:
-            data += fp.read() # read the rest
         l = len(data)
         while off + s <= l:
             cur = data[off:off + s]
@@ -378,20 +171,6 @@
         self.size = struct.calcsize(indexformatng)
 
     def parseindex(self, fp, data, inline):
-        if len(data) == _prereadsize:
-            if util.openhardlinks() and not inline:
-                # big index, let's parse it on demand
-                parser = lazyparser(fp)
-                index = lazyindex(parser)
-                nodemap = lazymap(parser)
-                e = list(index[0])
-                type = gettype(e[0])
-                e[0] = offset_type(0, type)
-                index[0] = e
-                return index, nodemap, None
-            else:
-                data += fp.read()
-
         # call the C implementation to parse the index data
         index, nodemap, cache = parsers.parse_index(data, inline)
         return index, nodemap, cache
@@ -458,10 +237,7 @@
         i = ''
         try:
             f = self.opener(self.indexfile)
-            if "nonlazy" in getattr(self.opener, 'options', {}):
-                i = f.read()
-            else:
-                i = f.read(_prereadsize)
+            i = f.read()
             if len(i) > 0:
                 v = struct.unpack(versionformat, i[:4])[0]
         except IOError, inst:
@@ -496,28 +272,9 @@
                 self._chunkclear()
 
         # add the magic null revision at -1 (if it hasn't been done already)
-        if (self.index == [] or isinstance(self.index, lazyindex) or
-            self.index[-1][7] != nullid) :
+        if self.index == [] or self.index[-1][7] != nullid:
             self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
 
-    def _loadindex(self, start, end):
-        """load a block of indexes all at once from the lazy parser"""
-        if isinstance(self.index, lazyindex):
-            self.index.p.loadindex(start, end)
-
-    def _loadindexmap(self):
-        """loads both the map and the index from the lazy parser"""
-        if isinstance(self.index, lazyindex):
-            p = self.index.p
-            p.loadindex()
-            self.nodemap = p.map
-
-    def _loadmap(self):
-        """loads the map from the lazy parser"""
-        if isinstance(self.nodemap, lazymap):
-            self.nodemap.p.loadmap()
-            self.nodemap = self.nodemap.p.map
-
     def tip(self):
         return self.node(len(self.index) - 2)
     def __len__(self):
@@ -978,7 +735,7 @@
     def _addchunk(self, offset, data):
         o, d = self._chunkcache
         # try to add to existing cache
-        if o + len(d) == offset and len(d) + len(data) < _prereadsize:
+        if o + len(d) == offset and len(d) + len(data) < _chunksize:
             self._chunkcache = o, d + data
         else:
             self._chunkcache = offset, data
@@ -1060,7 +817,6 @@
                               (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
 
         # build delta chain
-        self._loadindex(base, rev + 1)
         chain = []
         index = self.index # for performance
         iterrev = rev
@@ -1413,9 +1169,6 @@
         if len(self) == 0:
             return
 
-        if isinstance(self.index, lazyindex):
-            self._loadindexmap()
-
         for rev in self:
             if self.index[rev][4] >= minlink:
                 break
--- a/mercurial/statichttprepo.py	Tue Jan 11 14:58:17 2011 -0600
+++ b/mercurial/statichttprepo.py	Tue Jan 04 14:12:52 2011 -0600
@@ -77,7 +77,6 @@
             return httprangereader(f, urlopener)
         return o
 
-    opener.options = {'nonlazy': 1}
     return opener
 
 class statichttprepository(localrepo.localrepository):
--- a/tests/test-parseindex2.py	Tue Jan 11 14:58:17 2011 -0600
+++ b/tests/test-parseindex2.py	Tue Jan 04 14:12:52 2011 -0600
@@ -21,7 +21,7 @@
     index = []
     nodemap =  {nullid: nullrev}
     n = off = 0
-    # if we're not using lazymap, always read the whole index
+
     l = len(data) - s
     append = index.append
     if inline: