--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/fastannotate/revmap.py Mon Jul 30 22:50:00 2018 -0400
@@ -0,0 +1,254 @@
+# Copyright 2016-present Facebook. All Rights Reserved.
+#
+# revmap: trivial hg hash - linelog rev bidirectional map
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import bisect
+import os
+import struct
+
+from mercurial.node import hex
+from mercurial import (
+ error as hgerror,
+ pycompat,
+)
+from . import error
+
+# the revmap file format is straightforward:
+#
+# 8 bytes: header
+# 1 byte : flag for linelog revision 1
+# ? bytes: (optional) '\0'-terminated path string
+# only exists if (flag & renameflag) != 0
+# 20 bytes: hg hash for linelog revision 1
+# 1 byte : flag for linelog revision 2
+# ? bytes: (optional) '\0'-terminated path string
+# 20 bytes: hg hash for linelog revision 2
+# ....
+#
+# the implementation is kinda stupid: __init__ loads the whole revmap.
+# no laziness. benchmark shows loading 10000 revisions is about 0.015
+# seconds, which looks enough for our use-case. if this implementation
+# becomes a bottleneck, we can change it to lazily read the file
+# from the end.
+
+# whether the changeset is in the side branch. i.e. not in the linear main
+# branch but only got referenced by lines in merge changesets.
+sidebranchflag = 1
+
+# whether the changeset changes the file path (ie. is a rename)
+renameflag = 2
+
+# len(mercurial.node.nullid)
+_hshlen = 20
+
+class revmap(object):
+ """trivial hg bin hash - linelog rev bidirectional map
+
+ also stores a flag (uint8) for each revision, and track renames.
+ """
+
+ HEADER = b'REVMAP1\0'
+
+ def __init__(self, path=None):
+ """create or load the revmap, optionally associate to a file
+
+ if path is None, the revmap is entirely in-memory. the caller is
+ responsible for locking. concurrent writes to a same file is unsafe.
+ the caller needs to make sure one file is associated to at most one
+ revmap object at a time."""
+ self.path = path
+ self._rev2hsh = [None]
+ self._rev2flag = [None]
+ self._hsh2rev = {}
+ # since rename does not happen frequently, do not store path for every
+ # revision. self._renamerevs can be used for bisecting.
+ self._renamerevs = [0]
+ self._renamepaths = ['']
+ self._lastmaxrev = -1
+ if path:
+ if os.path.exists(path):
+ self._load()
+ else:
+ # write the header so "append" can do incremental updates
+ self.flush()
+
+ def copyfrom(self, rhs):
+ """copy the map data from another revmap. do not affect self.path"""
+ self._rev2hsh = rhs._rev2hsh[:]
+ self._rev2flag = rhs._rev2flag[:]
+ self._hsh2rev = rhs._hsh2rev.copy()
+ self._renamerevs = rhs._renamerevs[:]
+ self._renamepaths = rhs._renamepaths[:]
+ self._lastmaxrev = -1
+
+ @property
+ def maxrev(self):
+ """return max linelog revision number"""
+ return len(self._rev2hsh) - 1
+
+ def append(self, hsh, sidebranch=False, path=None, flush=False):
+ """add a binary hg hash and return the mapped linelog revision.
+ if flush is True, incrementally update the file.
+ """
+ if hsh in self._hsh2rev:
+ raise error.CorruptedFileError('%r is in revmap already' % hex(hsh))
+ if len(hsh) != _hshlen:
+ raise hgerror.ProgrammingError('hsh must be %d-char long' % _hshlen)
+ idx = len(self._rev2hsh)
+ flag = 0
+ if sidebranch:
+ flag |= sidebranchflag
+ if path is not None and path != self._renamepaths[-1]:
+ flag |= renameflag
+ self._renamerevs.append(idx)
+ self._renamepaths.append(path)
+ self._rev2hsh.append(hsh)
+ self._rev2flag.append(flag)
+ self._hsh2rev[hsh] = idx
+ if flush:
+ self.flush()
+ return idx
+
+ def rev2hsh(self, rev):
+ """convert linelog revision to hg hash. return None if not found."""
+ if rev > self.maxrev or rev < 0:
+ return None
+ return self._rev2hsh[rev]
+
+ def rev2flag(self, rev):
+ """get the flag (uint8) for a given linelog revision.
+ return None if revision does not exist.
+ """
+ if rev > self.maxrev or rev < 0:
+ return None
+ return self._rev2flag[rev]
+
+ def rev2path(self, rev):
+ """get the path for a given linelog revision.
+ return None if revision does not exist.
+ """
+ if rev > self.maxrev or rev < 0:
+ return None
+ idx = bisect.bisect_right(self._renamerevs, rev) - 1
+ return self._renamepaths[idx]
+
+ def hsh2rev(self, hsh):
+ """convert hg hash to linelog revision. return None if not found."""
+ return self._hsh2rev.get(hsh)
+
+ def clear(self, flush=False):
+ """make the map empty. if flush is True, write to disk"""
+ # rev 0 is reserved, real rev starts from 1
+ self._rev2hsh = [None]
+ self._rev2flag = [None]
+ self._hsh2rev = {}
+ self._rev2path = ['']
+ self._lastmaxrev = -1
+ if flush:
+ self.flush()
+
+ def flush(self):
+ """write the state down to the file"""
+ if not self.path:
+ return
+ if self._lastmaxrev == -1: # write the entire file
+ with open(self.path, 'wb') as f:
+ f.write(self.HEADER)
+ for i in pycompat.xrange(1, len(self._rev2hsh)):
+ self._writerev(i, f)
+ else: # append incrementally
+ with open(self.path, 'ab') as f:
+ for i in pycompat.xrange(self._lastmaxrev + 1,
+ len(self._rev2hsh)):
+ self._writerev(i, f)
+ self._lastmaxrev = self.maxrev
+
+ def _load(self):
+ """load state from file"""
+ if not self.path:
+ return
+ # use local variables in a loop. CPython uses LOAD_FAST for them,
+ # which is faster than both LOAD_CONST and LOAD_GLOBAL.
+ flaglen = 1
+ hshlen = _hshlen
+ with open(self.path, 'rb') as f:
+ if f.read(len(self.HEADER)) != self.HEADER:
+ raise error.CorruptedFileError()
+ self.clear(flush=False)
+ while True:
+ buf = f.read(flaglen)
+ if not buf:
+ break
+ flag = ord(buf)
+ rev = len(self._rev2hsh)
+ if flag & renameflag:
+ path = self._readcstr(f)
+ self._renamerevs.append(rev)
+ self._renamepaths.append(path)
+ hsh = f.read(hshlen)
+ if len(hsh) != hshlen:
+ raise error.CorruptedFileError()
+ self._hsh2rev[hsh] = rev
+ self._rev2flag.append(flag)
+ self._rev2hsh.append(hsh)
+ self._lastmaxrev = self.maxrev
+
+ def _writerev(self, rev, f):
+ """append a revision data to file"""
+ flag = self._rev2flag[rev]
+ hsh = self._rev2hsh[rev]
+ f.write(struct.pack('B', flag))
+ if flag & renameflag:
+ path = self.rev2path(rev)
+ if path is None:
+ raise error.CorruptedFileError('cannot find path for %s' % rev)
+ f.write(path + '\0')
+ f.write(hsh)
+
+ @staticmethod
+ def _readcstr(f):
+ """read a C-language-like '\0'-terminated string"""
+ buf = ''
+ while True:
+ ch = f.read(1)
+ if not ch: # unexpected eof
+ raise error.CorruptedFileError()
+ if ch == '\0':
+ break
+ buf += ch
+ return buf
+
+ def __contains__(self, f):
+ """(fctx or (node, path)) -> bool.
+ test if (node, path) is in the map, and is not in a side branch.
+ f can be either a tuple of (node, path), or a fctx.
+ """
+ if isinstance(f, tuple): # f: (node, path)
+ hsh, path = f
+ else: # f: fctx
+ hsh, path = f.node(), f.path()
+ rev = self.hsh2rev(hsh)
+ if rev is None:
+ return False
+ if path is not None and path != self.rev2path(rev):
+ return False
+ return (self.rev2flag(rev) & sidebranchflag) == 0
+
+def getlastnode(path):
+ """return the last hash in a revmap, without loading its full content.
+ this is equivalent to `m = revmap(path); m.rev2hsh(m.maxrev)`, but faster.
+ """
+ hsh = None
+ try:
+ with open(path, 'rb') as f:
+ f.seek(-_hshlen, 2)
+ if f.tell() > len(revmap.HEADER):
+ hsh = f.read(_hshlen)
+ except IOError:
+ pass
+ return hsh