pure: write a really lazy version of pure indexObject
authorMaciej Fijalkowski <fijall@gmail.com>
Sun, 24 Apr 2016 14:21:38 +0300
changeset 29133 255274719dc1
parent 29132 12769703d4ba
child 29134 8d5584d8345b
pure: write a really lazy version of pure indexObject On PyPy this version performs reasonably well compared to C version. Example command is "hg id" which gets faster, depending on details of your operating system and hard drive (it's bottlenecked on stat mostly) There is potential for improvements by storing extra as a condensed struct too.
mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py	Sat May 07 14:12:23 2016 +0100
+++ b/mercurial/pure/parsers.py	Sun Apr 24 14:21:38 2016 +0300
@@ -25,49 +25,111 @@
     # x is a tuple
     return x
 
-def parse_index2(data, inline):
-    def gettype(q):
-        return int(q & 0xFFFF)
+indexformatng = ">Qiiiiii20s12x"
+indexfirst = struct.calcsize('Q')
+sizeint = struct.calcsize('i')
+indexsize = struct.calcsize(indexformatng)
+
+def gettype(q):
+    return int(q & 0xFFFF)
 
-    def offset_type(offset, type):
-        return long(long(offset) << 16 | type)
+def offset_type(offset, type):
+    return long(long(offset) << 16 | type)
+
+class BaseIndexObject(object):
+    def __len__(self):
+        return self._lgt + len(self._extra) + 1
+
+    def insert(self, i, tup):
+        assert i == -1
+        self._extra.append(tup)
 
-    indexformatng = ">Qiiiiii20s12x"
+    def _fix_index(self, i):
+        if not isinstance(i, int):
+            raise TypeError("expecting int indexes")
+        if i < 0:
+            i = len(self) + i
+        if i < 0 or i >= len(self):
+            raise IndexError
+        return i
 
-    s = struct.calcsize(indexformatng)
-    index = []
-    cache = None
-    off = 0
+    def __getitem__(self, i):
+        i = self._fix_index(i)
+        if i == len(self) - 1:
+            return (0, 0, 0, -1, -1, -1, -1, nullid)
+        if i >= self._lgt:
+            return self._extra[i - self._lgt]
+        index = self._calculate_index(i)
+        r = struct.unpack(indexformatng, self._data[index:index + indexsize])
+        if i == 0:
+            e = list(r)
+            type = gettype(e[0])
+            e[0] = offset_type(0, type)
+            return tuple(e)
+        return r
+
+class IndexObject(BaseIndexObject):
+    def __init__(self, data):
+        assert len(data) % indexsize == 0
+        self._data = data
+        self._lgt = len(data) // indexsize
+        self._extra = []
+
+    def _calculate_index(self, i):
+        return i * indexsize
 
-    l = len(data) - s
-    append = index.append
-    if inline:
-        cache = (0, data)
-        while off <= l:
-            e = _unpack(indexformatng, data[off:off + s])
-            append(e)
-            if e[1] < 0:
-                break
-            off += e[1] + s
-    else:
-        while off <= l:
-            e = _unpack(indexformatng, data[off:off + s])
-            append(e)
-            off += s
+    def __delitem__(self, i):
+        if not isinstance(i, slice) or not i.stop == -1 or not i.step is None:
+            raise ValueError("deleting slices only supports a:-1 with step 1")
+        i = self._fix_index(i.start)
+        if i < self._lgt:
+            self._data = self._data[:i * indexsize]
+            self._lgt = i
+            self._extra = []
+        else:
+            self._extra = self._extra[:i - self._lgt]
+
+class InlinedIndexObject(BaseIndexObject):
+    def __init__(self, data, inline=0):
+        self._data = data
+        self._lgt = self._inline_scan(None)
+        self._inline_scan(self._lgt)
+        self._extra = []
 
-    if off != len(data):
-        raise ValueError('corrupt index file')
+    def _inline_scan(self, lgt):
+        off = 0
+        if lgt is not None:
+            self._offsets = [0] * lgt
+        count = 0
+        while off <= len(self._data) - indexsize:
+            s, = struct.unpack('>i',
+                self._data[off + indexfirst:off + sizeint + indexfirst])
+            if lgt is not None:
+                self._offsets[count] = off
+            count += 1
+            off += indexsize + s
+        if off != len(self._data):
+            raise ValueError("corrupted data")
+        return count
 
-    if index:
-        e = list(index[0])
-        type = gettype(e[0])
-        e[0] = offset_type(0, type)
-        index[0] = tuple(e)
+    def __delitem__(self, i):
+        if not isinstance(i, slice) or not i.stop == -1 or not i.step is None:
+            raise ValueError("deleting slices only supports a:-1 with step 1")
+        i = self._fix_index(i.start)
+        if i < self._lgt:
+            self._offsets = self._offsets[:i]
+            self._lgt = i
+            self._extra = []
+        else:
+            self._extra = self._extra[:i - self._lgt]
 
-    # add the magic null revision at -1
-    index.append((0, 0, 0, -1, -1, -1, -1, nullid))
+    def _calculate_index(self, i):
+        return self._offsets[i]
 
-    return index, cache
+def parse_index2(data, inline):
+    if not inline:
+        return IndexObject(data), None
+    return InlinedIndexObject(data, inline), (0, data)
 
 def parse_dirstate(dmap, copymap, st):
     parents = [st[:20], st[20: 40]]