mercurial/manifest.py
changeset 24225 3e5c4af69808
parent 24224 d71837d06597
child 24226 b992769dd1be
--- a/mercurial/manifest.py	Sat Mar 07 11:43:12 2015 -0500
+++ b/mercurial/manifest.py	Sat Mar 07 12:04:39 2015 -0500
@@ -9,53 +9,119 @@
 import mdiff, parsers, error, revlog, util
 import array, struct
 
-# Pure Python fallback
-def _parsemanifest(mfdict, fdict, lines):
-    bin = revlog.bin
-    for l in lines.splitlines():
-        f, n = l.split('\0')
-        if len(n) > 40:
-            fdict[f] = n[40:]
-            mfdict[f] = bin(n[:40])
-        else:
-            mfdict[f] = bin(n)
+
+class _lazymanifest(dict):
+    """This is the pure implementation of lazymanifest.
+
+    It has not been optimized *at all* and is not lazy.
+    """
 
-def _parse(lines, mfdict, flags):
-    try:
-        parsers.parse_manifest(mfdict, flags, lines)
-    except AttributeError:
-        _parsemanifest(mfdict, flags, lines)
-    return mfdict
-
-class manifestdict(dict):
-    def __init__(self, data=''):
-        self._flags = {}
-        _parse(data, self, self._flags)
+    def __init__(self, data):
+        # This init method does a little bit of excessive-looking
+        # precondition checking. This is so that the behavior of this
+        # class exactly matches its C counterpart to try and help
+        # prevent surprise breakage for anyone that develops against
+        # the pure version.
+        if data and data[-1] != '\n':
+            raise ValueError('Manifest did not end in a newline.')
+        dict.__init__(self)
+        prev = None
+        for l in data.splitlines():
+            if prev is not None and prev > l:
+                raise ValueError('Manifest lines not in sorted order.')
+            prev = l
+            f, n = l.split('\0')
+            if len(n) > 40:
+                self[f] = revlog.bin(n[:40]), n[40:]
+            else:
+                self[f] = revlog.bin(n), ''
 
     def __setitem__(self, k, v):
-        assert v is not None
-        dict.__setitem__(self, k, v)
-    def flags(self, f):
-        return self._flags.get(f, "")
-    def setflag(self, f, flags):
-        """Set the flags (symlink, executable) for path f."""
-        self._flags[f] = flags
+        node, flag = v
+        assert node is not None
+        if len(node) > 21:
+            node = node[:21] # match c implementation behavior
+        dict.__setitem__(self, k, (node, flag))
+
+    def __iter__(self):
+        return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
+
     def copy(self):
-        copy = manifestdict()
-        dict.__init__(copy, self)
-        copy._flags = dict.copy(self._flags)
-        return copy
+        c = _lazymanifest('')
+        c.update(self)
+        return c
+
+    def diff(self, m2, clean=False):
+        '''Finds changes between the current manifest and m2.'''
+        diff = {}
+
+        for fn, e1 in self.iteritems():
+            if fn not in m2:
+                diff[fn] = e1, (None, '')
+            else:
+                e2 = m2[fn]
+                if e1 != e2:
+                    diff[fn] = e1, e2
+                elif clean:
+                    diff[fn] = None
+
+        for fn, e2 in m2.iteritems():
+            if fn not in self:
+                diff[fn] = (None, ''), e2
+
+        return diff
+
+    def filtercopy(self, filterfn):
+        c = _lazymanifest('')
+        for f, n, fl in self:
+            if filterfn(f):
+                c[f] = n, fl
+        return c
+
+    def text(self):
+        """Get the full data of this manifest as a bytestring."""
+        fl = sorted(self)
+
+        _hex = revlog.hex
+        # if this is changed to support newlines in filenames,
+        # be sure to check the templates/ dir again (especially *-raw.tmpl)
+        return ''.join("%s\0%s%s\n" % (
+            f, _hex(n[:20]), flag) for f, n, flag in fl)
+
+class manifestdict(object):
+    def __init__(self, data=''):
+        self._lm = _lazymanifest(data)
+
+    def __getitem__(self, key):
+        return self._lm[key][0]
+
+    def __len__(self):
+        return len(self._lm)
+
+    def __setitem__(self, key, node):
+        self._lm[key] = node, self.flags(key, '')
+
+    def __contains__(self, key):
+        return key in self._lm
+
+    def __delitem__(self, key):
+        del self._lm[key]
+
+    def keys(self):
+        return [x[0] for x in self._lm]
+
+    def iterkeys(self):
+        return iter(self.keys())
+
     def intersectfiles(self, files):
-        '''make a new manifestdict with the intersection of self with files
+        '''make a new lazymanifest with the intersection of self with files
 
         The algorithm assumes that files is much smaller than self.'''
         ret = manifestdict()
+        lm = self._lm
         for fn in files:
-            if fn in self:
-                ret[fn] = self[fn]
-                flags = self._flags.get(fn, None)
-                if flags:
-                    ret._flags[fn] = flags
+            if fn in lm:
+                ret._lm[fn] = self._lm[fn]
         return ret
 
     def filesnotin(self, m2):
@@ -74,11 +140,9 @@
             (not match.anypats() and util.all(fn in self for fn in files))):
             return self.intersectfiles(files)
 
-        m = self.copy()
-        for fn in m.keys():
-            if not match(fn):
-                del m[fn]
-        return m
+        lm = manifestdict('')
+        lm._lm = self._lm.filtercopy(match)
+        return lm
 
     def diff(self, m2, clean=False):
         '''Finds changes between the current manifest and m2.
@@ -95,35 +159,36 @@
         the nodeid will be None and the flags will be the empty
         string.
         '''
-        diff = {}
+        return self._lm.diff(m2._lm, clean)
+
+    def setflag(self, key, flag):
+        self._lm[key] = self[key], flag
+
+    def get(self, key, default=None):
+        try:
+            return self._lm[key][0]
+        except KeyError:
+            return default
 
-        for fn, n1 in self.iteritems():
-            fl1 = self._flags.get(fn, '')
-            n2 = m2.get(fn, None)
-            fl2 = m2._flags.get(fn, '')
-            if n2 is None:
-                fl2 = ''
-            if n1 != n2 or fl1 != fl2:
-                diff[fn] = ((n1, fl1), (n2, fl2))
-            elif clean:
-                diff[fn] = None
+    def flags(self, key, default=''):
+        try:
+            return self._lm[key][1]
+        except KeyError:
+            return default
 
-        for fn, n2 in m2.iteritems():
-            if fn not in self:
-                fl2 = m2._flags.get(fn, '')
-                diff[fn] = ((None, ''), (n2, fl2))
+    def __iter__(self):
+        return (x[0] for x in self._lm)
 
-        return diff
+    def copy(self):
+        c = manifestdict('')
+        c._lm = self._lm.copy()
+        return c
+
+    def iteritems(self):
+        return (x[:2] for x in self._lm)
 
     def text(self):
-        """Get the full data of this manifest as a bytestring."""
-        fl = sorted(self)
-        _checkforbidden(fl)
-
-        hex, flags = revlog.hex, self.flags
-        # if this is changed to support newlines in filenames,
-        # be sure to check the templates/ dir again (especially *-raw.tmpl)
-        return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
+        return self._lm.text()
 
     def fastdelta(self, base, changes):
         """Given a base manifest text as an array.array and a list of changes
@@ -143,7 +208,8 @@
             # bs will either be the index of the item or the insert point
             start, end = _msearch(addbuf, f, start)
             if not todelete:
-                l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f))
+                h, fl = self._lm[f]
+                l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
             else:
                 if start == end:
                     # item we want to delete was not found, error out
@@ -280,12 +346,10 @@
             m = self._mancache[node][0]
             return m.get(f), m.flags(f)
         text = self.revision(node)
-        start, end = _msearch(text, f)
-        if start == end:
+        try:
+            return manifestdict(text)._lm[f]
+        except KeyError:
             return None, None
-        l = text[start:end]
-        f, n = l.split('\0')
-        return revlog.bin(n[:40]), n[40:-1]
 
     def add(self, m, transaction, link, p1, p2, added, removed):
         if p1 in self._mancache: