# HG changeset patch # User Augie Fackler # Date 1425747879 18000 # Node ID 3e5c4af69808eedec2407a4982d2fd8a84ced7af # Parent d71837d06597c3eae26873a1fce04fd89673566c manifest: split manifestdict into high-level and low-level logic The low-level logic type (_lazymanifest) matches the behavior of the C implementation introduced in a5f1bccd. A future patch will use that when available. diff -r d71837d06597 -r 3e5c4af69808 mercurial/manifest.py --- a/mercurial/manifest.py Sat Mar 07 11:43:12 2015 -0500 +++ b/mercurial/manifest.py Sat Mar 07 12:04:39 2015 -0500 @@ -9,53 +9,119 @@ import mdiff, parsers, error, revlog, util import array, struct -# Pure Python fallback -def _parsemanifest(mfdict, fdict, lines): - bin = revlog.bin - for l in lines.splitlines(): - f, n = l.split('\0') - if len(n) > 40: - fdict[f] = n[40:] - mfdict[f] = bin(n[:40]) - else: - mfdict[f] = bin(n) + +class _lazymanifest(dict): + """This is the pure implementation of lazymanifest. + + It has not been optimized *at all* and is not lazy. + """ -def _parse(lines, mfdict, flags): - try: - parsers.parse_manifest(mfdict, flags, lines) - except AttributeError: - _parsemanifest(mfdict, flags, lines) - return mfdict - -class manifestdict(dict): - def __init__(self, data=''): - self._flags = {} - _parse(data, self, self._flags) + def __init__(self, data): + # This init method does a little bit of excessive-looking + # precondition checking. This is so that the behavior of this + # class exactly matches its C counterpart to try and help + # prevent surprise breakage for anyone that develops against + # the pure version. + if data and data[-1] != '\n': + raise ValueError('Manifest did not end in a newline.') + dict.__init__(self) + prev = None + for l in data.splitlines(): + if prev is not None and prev > l: + raise ValueError('Manifest lines not in sorted order.') + prev = l + f, n = l.split('\0') + if len(n) > 40: + self[f] = revlog.bin(n[:40]), n[40:] + else: + self[f] = revlog.bin(n), '' def __setitem__(self, k, v): - assert v is not None - dict.__setitem__(self, k, v) - def flags(self, f): - return self._flags.get(f, "") - def setflag(self, f, flags): - """Set the flags (symlink, executable) for path f.""" - self._flags[f] = flags + node, flag = v + assert node is not None + if len(node) > 21: + node = node[:21] # match c implementation behavior + dict.__setitem__(self, k, (node, flag)) + + def __iter__(self): + return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) + def copy(self): - copy = manifestdict() - dict.__init__(copy, self) - copy._flags = dict.copy(self._flags) - return copy + c = _lazymanifest('') + c.update(self) + return c + + def diff(self, m2, clean=False): + '''Finds changes between the current manifest and m2.''' + diff = {} + + for fn, e1 in self.iteritems(): + if fn not in m2: + diff[fn] = e1, (None, '') + else: + e2 = m2[fn] + if e1 != e2: + diff[fn] = e1, e2 + elif clean: + diff[fn] = None + + for fn, e2 in m2.iteritems(): + if fn not in self: + diff[fn] = (None, ''), e2 + + return diff + + def filtercopy(self, filterfn): + c = _lazymanifest('') + for f, n, fl in self: + if filterfn(f): + c[f] = n, fl + return c + + def text(self): + """Get the full data of this manifest as a bytestring.""" + fl = sorted(self) + + _hex = revlog.hex + # if this is changed to support newlines in filenames, + # be sure to check the templates/ dir again (especially *-raw.tmpl) + return ''.join("%s\0%s%s\n" % ( + f, _hex(n[:20]), flag) for f, n, flag in fl) + +class manifestdict(object): + def __init__(self, data=''): + self._lm = _lazymanifest(data) + + def __getitem__(self, key): + return self._lm[key][0] + + def __len__(self): + return len(self._lm) + + def __setitem__(self, key, node): + self._lm[key] = node, self.flags(key, '') + + def __contains__(self, key): + return key in self._lm + + def __delitem__(self, key): + del self._lm[key] + + def keys(self): + return [x[0] for x in self._lm] + + def iterkeys(self): + return iter(self.keys()) + def intersectfiles(self, files): - '''make a new manifestdict with the intersection of self with files + '''make a new lazymanifest with the intersection of self with files The algorithm assumes that files is much smaller than self.''' ret = manifestdict() + lm = self._lm for fn in files: - if fn in self: - ret[fn] = self[fn] - flags = self._flags.get(fn, None) - if flags: - ret._flags[fn] = flags + if fn in lm: + ret._lm[fn] = self._lm[fn] return ret def filesnotin(self, m2): @@ -74,11 +140,9 @@ (not match.anypats() and util.all(fn in self for fn in files))): return self.intersectfiles(files) - m = self.copy() - for fn in m.keys(): - if not match(fn): - del m[fn] - return m + lm = manifestdict('') + lm._lm = self._lm.filtercopy(match) + return lm def diff(self, m2, clean=False): '''Finds changes between the current manifest and m2. @@ -95,35 +159,36 @@ the nodeid will be None and the flags will be the empty string. ''' - diff = {} + return self._lm.diff(m2._lm, clean) + + def setflag(self, key, flag): + self._lm[key] = self[key], flag + + def get(self, key, default=None): + try: + return self._lm[key][0] + except KeyError: + return default - for fn, n1 in self.iteritems(): - fl1 = self._flags.get(fn, '') - n2 = m2.get(fn, None) - fl2 = m2._flags.get(fn, '') - if n2 is None: - fl2 = '' - if n1 != n2 or fl1 != fl2: - diff[fn] = ((n1, fl1), (n2, fl2)) - elif clean: - diff[fn] = None + def flags(self, key, default=''): + try: + return self._lm[key][1] + except KeyError: + return default - for fn, n2 in m2.iteritems(): - if fn not in self: - fl2 = m2._flags.get(fn, '') - diff[fn] = ((None, ''), (n2, fl2)) + def __iter__(self): + return (x[0] for x in self._lm) - return diff + def copy(self): + c = manifestdict('') + c._lm = self._lm.copy() + return c + + def iteritems(self): + return (x[:2] for x in self._lm) def text(self): - """Get the full data of this manifest as a bytestring.""" - fl = sorted(self) - _checkforbidden(fl) - - hex, flags = revlog.hex, self.flags - # if this is changed to support newlines in filenames, - # be sure to check the templates/ dir again (especially *-raw.tmpl) - return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl) + return self._lm.text() def fastdelta(self, base, changes): """Given a base manifest text as an array.array and a list of changes @@ -143,7 +208,8 @@ # bs will either be the index of the item or the insert point start, end = _msearch(addbuf, f, start) if not todelete: - l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f)) + h, fl = self._lm[f] + l = "%s\0%s%s\n" % (f, revlog.hex(h), fl) else: if start == end: # item we want to delete was not found, error out @@ -280,12 +346,10 @@ m = self._mancache[node][0] return m.get(f), m.flags(f) text = self.revision(node) - start, end = _msearch(text, f) - if start == end: + try: + return manifestdict(text)._lm[f] + except KeyError: return None, None - l = text[start:end] - f, n = l.split('\0') - return revlog.bin(n[:40]), n[40:-1] def add(self, m, transaction, link, p1, p2, added, removed): if p1 in self._mancache: diff -r d71837d06597 -r 3e5c4af69808 tests/test-manifest.py --- a/tests/test-manifest.py Sat Mar 07 11:43:12 2015 -0500 +++ b/tests/test-manifest.py Sat Mar 07 12:04:39 2015 -0500 @@ -4,7 +4,7 @@ import silenttestrunner -from mercurial import parsers +from mercurial import manifest as manifestmod HASH_1 = '1' * 40 HASH_2 = 'f' * 40 @@ -38,12 +38,12 @@ self.assert_(thing in container, msg) def testEmptyManifest(self): - m = parsers.lazymanifest('') + m = manifestmod._lazymanifest('') self.assertEqual(0, len(m)) self.assertEqual([], list(m)) def testManifest(self): - m = parsers.lazymanifest(A_SHORT_MANIFEST) + m = manifestmod._lazymanifest(A_SHORT_MANIFEST) want = [ ('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'), ('foo', binascii.unhexlify(HASH_1), ''), @@ -58,13 +58,13 @@ def testSetItem(self): want = binascii.unhexlify(HASH_1), '' - m = parsers.lazymanifest('') + m = manifestmod._lazymanifest('') m['a'] = want self.assertIn('a', m) self.assertEqual(want, m['a']) self.assertEqual('a\0' + HASH_1 + '\n', m.text()) - m = parsers.lazymanifest(A_SHORT_MANIFEST) + m = manifestmod._lazymanifest(A_SHORT_MANIFEST) m['a'] = want self.assertEqual(want, m['a']) self.assertEqual('a\0' + HASH_1 + '\n' + A_SHORT_MANIFEST, @@ -76,7 +76,7 @@ def testCompaction(self): unhex = binascii.unhexlify h1, h2 = unhex(HASH_1), unhex(HASH_2) - m = parsers.lazymanifest(A_SHORT_MANIFEST) + m = manifestmod._lazymanifest(A_SHORT_MANIFEST) m['alpha'] = h1, '' m['beta'] = h2, '' del m['foo'] @@ -91,8 +91,8 @@ self.assertEqual(w, list(m)) def testSetGetNodeSuffix(self): - clean = parsers.lazymanifest(A_SHORT_MANIFEST) - m = parsers.lazymanifest(A_SHORT_MANIFEST) + clean = manifestmod._lazymanifest(A_SHORT_MANIFEST) + m = manifestmod._lazymanifest(A_SHORT_MANIFEST) h, f = m['foo'] want = h + 'a', f # Merge code wants to set 21-byte fake hashes at times @@ -120,7 +120,7 @@ self.assertEqual({'foo': ((h, ''), want)}, clean.diff(m)) def testFilterCopyException(self): - m = parsers.lazymanifest(A_SHORT_MANIFEST) + m = manifestmod._lazymanifest(A_SHORT_MANIFEST) def filt(path): if path == 'foo': assert False @@ -128,7 +128,7 @@ self.assertRaises(AssertionError, m.filtercopy, filt) def testRemoveItem(self): - m = parsers.lazymanifest(A_SHORT_MANIFEST) + m = manifestmod._lazymanifest(A_SHORT_MANIFEST) del m['foo'] self.assertRaises(KeyError, lambda : m['foo']) self.assertEqual(1, len(m)) @@ -138,9 +138,9 @@ MISSING = (None, '') addl = 'z-only-in-left\0' + HASH_1 + '\n' addr = 'z-only-in-right\0' + HASH_2 + 'x\n' - left = parsers.lazymanifest( + left = manifestmod._lazymanifest( A_SHORT_MANIFEST.replace(HASH_1, HASH_3 + 'x') + addl) - right = parsers.lazymanifest(A_SHORT_MANIFEST + addr) + right = manifestmod._lazymanifest(A_SHORT_MANIFEST + addr) want = { 'foo': ((binascii.unhexlify(HASH_3), 'x'), (binascii.unhexlify(HASH_1), '')), @@ -154,14 +154,14 @@ 'foo': (MISSING, (binascii.unhexlify(HASH_3), 'x')), 'z-only-in-left': (MISSING, (binascii.unhexlify(HASH_1), '')), } - self.assertEqual(want, parsers.lazymanifest('').diff(left)) + self.assertEqual(want, manifestmod._lazymanifest('').diff(left)) want = { 'bar/baz/qux.py': ((binascii.unhexlify(HASH_2), 'l'), MISSING), 'foo': ((binascii.unhexlify(HASH_3), 'x'), MISSING), 'z-only-in-left': ((binascii.unhexlify(HASH_1), ''), MISSING), } - self.assertEqual(want, left.diff(parsers.lazymanifest(''))) + self.assertEqual(want, left.diff(manifestmod._lazymanifest(''))) copy = right.copy() del copy['z-only-in-right'] del right['foo'] @@ -171,7 +171,7 @@ } self.assertEqual(want, right.diff(copy)) - short = parsers.lazymanifest(A_SHORT_MANIFEST) + short = manifestmod._lazymanifest(A_SHORT_MANIFEST) pruned = short.copy() del pruned['foo'] want = { @@ -192,30 +192,37 @@ backwards = ''.join( l + '\n' for l in reversed(A_SHORT_MANIFEST.split('\n')) if l) try: - parsers.lazymanifest(backwards) + manifestmod._lazymanifest(backwards) self.fail('Should have raised ValueError') except ValueError, v: self.assertIn('Manifest lines not in sorted order.', str(v)) def testNoTerminalNewline(self): try: - parsers.lazymanifest(A_SHORT_MANIFEST + 'wat') + manifestmod._lazymanifest(A_SHORT_MANIFEST + 'wat') self.fail('Should have raised ValueError') except ValueError, v: self.assertIn('Manifest did not end in a newline.', str(v)) def testNoNewLineAtAll(self): try: - parsers.lazymanifest('wat') + manifestmod._lazymanifest('wat') self.fail('Should have raised ValueError') except ValueError, v: self.assertIn('Manifest did not end in a newline.', str(v)) def testHugeManifest(self): - m = parsers.lazymanifest(A_HUGE_MANIFEST) + m = manifestmod._lazymanifest(A_HUGE_MANIFEST) self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m)) self.assertEqual(len(m), len(list(m))) + def testIntersectFiles(self): + m = manifestmod.manifestdict(A_HUGE_MANIFEST) + m2 = m.intersectfiles(['file1', 'file200', 'file300']) + w = ('file1\0%sx\n' + 'file200\0%sl\n' + 'file300\0%s\n') % (HASH_2, HASH_1, HASH_1) + self.assertEqual(w, m2.text()) if __name__ == '__main__': silenttestrunner.main(__name__)