# HG changeset patch # User Martin von Zweigbergk # Date 1428992462 25200 # Node ID b5052fc73300faf5c6f2146dee68d8d4f82a0f6c # Parent 252412e24551bdabc55fd0287a781b7adca0da9b treemanifest: store submanifest revlog per directory With this change, when tree manifests are enabled (in .hg/requires), commits will be written with one manifest revlog per directory. The manifest revlogs are stored in .hg/store/meta/$dir/00manifest.[id]. Flat manifests can still be read and interacted with as usual (they are also read into treemanifest instances). The functionality for writing treemanifest as a flat manifest to disk is still left in the code; tests still pass with '_treeinmem=True' hardcoded. Exchange is not yet implemented. diff -r 252412e24551 -r b5052fc73300 mercurial/manifest.py --- a/mercurial/manifest.py Fri May 15 10:29:39 2015 -0500 +++ b/mercurial/manifest.py Mon Apr 13 23:21:02 2015 -0700 @@ -444,11 +444,15 @@ class treemanifest(object): def __init__(self, dir='', text=''): self._dir = dir + self._node = revlog.nullid self._dirs = {} # Using _lazymanifest here is a little slower than plain old dicts self._files = {} self._flags = {} - self.parse(text) + def readsubtree(subdir, subm): + raise AssertionError('treemanifest constructor only accepts ' + 'flat manifests') + self.parse(text, readsubtree) def _subpath(self, path): return self._dir + path @@ -464,7 +468,22 @@ util.all(m._isempty() for m in self._dirs.values()))) def __str__(self): - return '' % self._dir + return ('' % + (self._dir, revlog.hex(self._node))) + + def dir(self): + '''The directory that this tree manifest represents, including a + trailing '/'. Empty string for the repo root directory.''' + return self._dir + + def node(self): + '''This node of this instance. nullid for unsaved instances. Should + be updated when the instance is read or written from a revlog. + ''' + return self._node + + def setnode(self, node): + self._node = node def iteritems(self): for p, n in sorted(self._dirs.items() + self._files.items()): @@ -557,6 +576,7 @@ def setflag(self, f, flags): """Set the flags (symlink, executable) for path f.""" + assert 'd' not in flags dir, subpath = _splittopdir(f) if dir: if dir not in self._dirs: @@ -567,6 +587,7 @@ def copy(self): copy = treemanifest(self._dir) + copy._node = self._node for d in self._dirs: copy._dirs[d] = self._dirs[d].copy() copy._files = dict.copy(self._files) @@ -737,11 +758,18 @@ _diff(self, m2) return result - def parse(self, text): + def parse(self, text, readsubtree): for f, n, fl in _parse(text): - self[f] = n - if fl: - self.setflag(f, fl) + if fl == 'd': + f = f + '/' + self._dirs[f] = readsubtree(self._subpath(f), n) + else: + # Use __setitem__ and setflag rather than assigning directly + # to _files and _flags, thereby letting us parse flat manifests + # as well as tree manifests. + self[f] = n + if fl: + self.setflag(f, fl) def text(self, usemanifestv2=False): """Get the full data of this manifest as a bytestring.""" @@ -749,8 +777,26 @@ return _text(((f, self[f], flags(f)) for f in self.keys()), usemanifestv2) + def dirtext(self, usemanifestv2=False): + """Get the full data of this directory as a bytestring. Make sure that + any submanifests have been written first, so their nodeids are correct. + """ + flags = self.flags + dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs] + files = [(f, self._files[f], flags(f)) for f in self._files] + return _text(sorted(dirs + files), usemanifestv2) + + def writesubtrees(self, m1, m2, writesubtree): + emptytree = treemanifest() + for d, subm in self._dirs.iteritems(): + subp1 = m1._dirs.get(d, emptytree)._node + subp2 = m2._dirs.get(d, emptytree)._node + if subp1 == revlog.nullid: + subp1, subp2 = subp2, subp1 + writesubtree(subm, subp1, subp2) + class manifest(revlog.revlog): - def __init__(self, opener): + def __init__(self, opener, dir=''): # During normal operations, we expect to deal with not more than four # revs at a time (such as during commit --amend). When rebasing large # stacks of commits, the number can go up, hence the config knob below. @@ -763,14 +809,19 @@ usetreemanifest = opts.get('treemanifest', usetreemanifest) usemanifestv2 = opts.get('manifestv2', usemanifestv2) self._mancache = util.lrucachedict(cachesize) - revlog.revlog.__init__(self, opener, "00manifest.i") self._treeinmem = usetreemanifest self._treeondisk = usetreemanifest self._usemanifestv2 = usemanifestv2 + indexfile = "00manifest.i" + if dir: + assert self._treeondisk + indexfile = "meta/" + dir + "00manifest.i" + revlog.revlog.__init__(self, opener, indexfile) + self._dir = dir def _newmanifest(self, data=''): if self._treeinmem: - return treemanifest('', data) + return treemanifest(self._dir, data) return manifestdict(data) def _slowreaddelta(self, node): @@ -812,8 +863,17 @@ if node in self._mancache: return self._mancache[node][0] text = self.revision(node) - arraytext = array.array('c', text) - m = self._newmanifest(text) + if self._treeondisk: + def readsubtree(dir, subm): + sublog = manifest(self.opener, dir) + return sublog.read(subm) + m = self._newmanifest() + m.parse(text, readsubtree) + m.setnode(node) + arraytext = None + else: + m = self._newmanifest(text) + arraytext = array.array('c', text) self._mancache[node] = (m, arraytext) return m @@ -851,10 +911,34 @@ # just encode a fulltext of the manifest and pass that # through to the revlog layer, and let it handle the delta # process. - text = m.text(self._usemanifestv2) - arraytext = array.array('c', text) - n = self.addrevision(text, transaction, link, p1, p2) + if self._treeondisk: + m1 = self.read(p1) + m2 = self.read(p2) + n = self._addtree(m, transaction, link, m1, m2) + arraytext = None + else: + text = m.text(self._usemanifestv2) + n = self.addrevision(text, transaction, link, p1, p2) + arraytext = array.array('c', text) self._mancache[n] = (m, arraytext) return n + + def _addtree(self, m, transaction, link, m1, m2): + def writesubtree(subm, subp1, subp2): + sublog = manifest(self.opener, subm.dir()) + sublog.add(subm, transaction, link, subp1, subp2, None, None) + m.writesubtrees(m1, m2, writesubtree) + text = m.dirtext(self._usemanifestv2) + # If the manifest is unchanged compared to one parent, + # don't write a new revision + if text == m1.dirtext(self._usemanifestv2): + n = m1.node() + elif text == m2.dirtext(self._usemanifestv2): + n = m2.node() + else: + n = self.addrevision(text, transaction, link, m1.node(), m2.node()) + # Save nodeid so parent manifest can calculate its nodeid + m.setnode(n) + return n diff -r 252412e24551 -r b5052fc73300 mercurial/store.py --- a/mercurial/store.py Fri May 15 10:29:39 2015 -0500 +++ b/mercurial/store.py Mon Apr 13 23:21:02 2015 -0700 @@ -187,7 +187,7 @@ def _hashencode(path, dotencode): digest = _sha(path).hexdigest() - le = lowerencode(path[5:]).split('/') # skips prefix 'data/' + le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/' parts = _auxencode(le, dotencode) basename = parts[-1] _root, ext = os.path.splitext(basename) diff -r 252412e24551 -r b5052fc73300 tests/test-treemanifest.t --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test-treemanifest.t Mon Apr 13 23:21:02 2015 -0700 @@ -0,0 +1,278 @@ + +Set up repo + + $ hg --config experimental.treemanifest=True init repo + $ cd repo + +Requirements get set on init + + $ grep treemanifest .hg/requires + treemanifest + +Without directories, looks like any other repo + + $ echo 0 > a + $ echo 0 > b + $ hg ci -Aqm initial + $ hg debugdata -m 0 + a\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc) + b\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc) + +Submanifest is stored in separate revlog + + $ mkdir dir1 + $ echo 1 > dir1/a + $ echo 1 > dir1/b + $ echo 1 > e + $ hg ci -Aqm 'add dir1' + $ hg debugdata -m 1 + a\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc) + b\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc) + dir1\x008b3ffd73f901e83304c83d33132c8e774ceac44ed (esc) + e\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc) + $ hg debugdata .hg/store/meta/dir1/00manifest.i 0 + a\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc) + b\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc) + +Can add nested directories + + $ mkdir dir1/dir1 + $ echo 2 > dir1/dir1/a + $ echo 2 > dir1/dir1/b + $ mkdir dir1/dir2 + $ echo 2 > dir1/dir2/a + $ echo 2 > dir1/dir2/b + $ hg ci -Aqm 'add dir1/dir1' + $ hg files -r . + a + b + dir1/a + dir1/b + dir1/dir1/a + dir1/dir1/b + dir1/dir2/a + dir1/dir2/b + e + +Revision is not created for unchanged directory + + $ mkdir dir2 + $ echo 3 > dir2/a + $ hg add dir2 + adding dir2/a + $ hg debugindex .hg/store/meta/dir1/00manifest.i > before + $ hg ci -qm 'add dir2' + $ hg debugindex .hg/store/meta/dir1/00manifest.i > after + $ diff before after + $ rm before after + +Removing directory does not create an revlog entry + + $ hg rm dir1/dir1 + removing dir1/dir1/a + removing dir1/dir1/b + $ hg debugindex .hg/store/meta/dir1/dir1/00manifest.i > before + $ hg ci -qm 'remove dir1/dir1' + $ hg debugindex .hg/store/meta/dir1/dir1/00manifest.i > after + $ diff before after + $ rm before after + +Check that hg files (calls treemanifest.walk()) works + + $ hg co 'desc("add dir2")' + 2 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ hg files -r . dir1 + dir1/a + dir1/b + dir1/dir1/a + dir1/dir1/b + dir1/dir2/a + dir1/dir2/b + +Check that status between revisions works (calls treemanifest.matches()) + + $ hg status --rev 'desc("add dir1")' --rev . dir1 + A dir1/dir1/a + A dir1/dir1/b + A dir1/dir2/a + A dir1/dir2/b + +Merge creates 2-parent revision of directory revlog + + $ echo 5 > dir1/a + $ hg ci -Aqm 'modify dir1/a' + $ hg co '.^' + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ echo 6 > dir1/b + $ hg ci -Aqm 'modify dir1/b' + $ hg merge 'desc("modify dir1/a")' + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + (branch merge, don't forget to commit) + $ hg ci -m 'conflict-free merge involving dir1/' + $ cat dir1/a + 5 + $ cat dir1/b + 6 + $ hg debugindex .hg/store/meta/dir1/00manifest.i + rev offset length base linkrev nodeid p1 p2 + 0 0 54 0 1 8b3ffd73f901 000000000000 000000000000 + 1 54 68 0 2 b66d046c644f 8b3ffd73f901 000000000000 + 2 122 12 0 4 b87265673c8a b66d046c644f 000000000000 + 3 134 95 0 5 aa5d3adcec72 b66d046c644f 000000000000 + 4 229 81 0 6 e29b066b91ad b66d046c644f 000000000000 + 5 310 107 5 7 a120ce2b83f5 e29b066b91ad aa5d3adcec72 + +Merge keeping directory from parent 1 does not create revlog entry. (Note that +dir1's manifest does change, but only because dir1/a's filelog changes.) + + $ hg co 'desc("add dir2")' + 2 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ echo 8 > dir2/a + $ hg ci -m 'modify dir2/a' + created new head + + $ hg debugindex .hg/store/meta/dir2/00manifest.i > before + $ hg merge 'desc("modify dir1/a")' + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + (branch merge, don't forget to commit) + $ hg revert -r 'desc("modify dir2/a")' . + reverting dir1/a (glob) + $ hg ci -m 'merge, keeping parent 1' + $ hg debugindex .hg/store/meta/dir2/00manifest.i > after + $ diff before after + $ rm before after + +Merge keeping directory from parent 2 does not create revlog entry. (Note that +dir2's manifest does change, but only because dir2/a's filelog changes.) + + $ hg co 'desc("modify dir2/a")' + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ hg debugindex .hg/store/meta/dir1/00manifest.i > before + $ hg merge 'desc("modify dir1/a")' + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + (branch merge, don't forget to commit) + $ hg revert -r 'desc("modify dir1/a")' . + reverting dir2/a (glob) + $ hg ci -m 'merge, keeping parent 2' + created new head + $ hg debugindex .hg/store/meta/dir1/00manifest.i > after + $ diff before after + $ rm before after + +Create flat source repo for tests with mixed flat/tree manifests + + $ cd .. + $ hg init repo-flat + $ cd repo-flat + +Create a few commits with flat manifest + + $ echo 0 > a + $ echo 0 > b + $ echo 0 > e + $ for d in dir1 dir1/dir1 dir1/dir2 dir2 + > do + > mkdir $d + > echo 0 > $d/a + > echo 0 > $d/b + > done + $ hg ci -Aqm initial + + $ echo 1 > a + $ echo 1 > dir1/a + $ echo 1 > dir1/dir1/a + $ hg ci -Aqm 'modify on branch 1' + + $ hg co 0 + 3 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ echo 2 > b + $ echo 2 > dir1/b + $ echo 2 > dir1/dir1/b + $ hg ci -Aqm 'modify on branch 2' + + $ hg merge 1 + 3 files updated, 0 files merged, 0 files removed, 0 files unresolved + (branch merge, don't forget to commit) + $ hg ci -m 'merge of flat manifests to new flat manifest' + +Create clone with tree manifests enabled + + $ cd .. + $ hg clone --pull --config experimental.treemanifest=1 repo-flat repo-mixed + requesting all changes + adding changesets + adding manifests + adding file changes + added 4 changesets with 17 changes to 11 files + updating to branch default + 11 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ cd repo-mixed + $ test -f .hg/store/meta + [1] + $ grep treemanifest .hg/requires + treemanifest + +Commit should store revlog per directory + + $ hg co 1 + 3 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ echo 3 > a + $ echo 3 > dir1/a + $ echo 3 > dir1/dir1/a + $ hg ci -m 'first tree' + created new head + $ find .hg/store/meta | sort + .hg/store/meta + .hg/store/meta/dir1 + .hg/store/meta/dir1/00manifest.i + .hg/store/meta/dir1/dir1 + .hg/store/meta/dir1/dir1/00manifest.i + .hg/store/meta/dir1/dir2 + .hg/store/meta/dir1/dir2/00manifest.i + .hg/store/meta/dir2 + .hg/store/meta/dir2/00manifest.i + +Merge of two trees + + $ hg co 2 + 6 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ hg merge 1 + 3 files updated, 0 files merged, 0 files removed, 0 files unresolved + (branch merge, don't forget to commit) + $ hg ci -m 'merge of flat manifests to new tree manifest' + created new head + $ hg diff -r 3 + +Parent of tree root manifest should be flat manifest, and two for merge + + $ hg debugindex -m + rev offset length base linkrev nodeid p1 p2 + 0 0 80 0 0 40536115ed9e 000000000000 000000000000 + 1 80 83 0 1 f3376063c255 40536115ed9e 000000000000 + 2 163 103 0 2 5d9b9da231a2 40536115ed9e 000000000000 + 3 266 83 0 3 d17d663cbd8a 5d9b9da231a2 f3376063c255 + 4 349 132 4 4 c05a51345f86 f3376063c255 000000000000 + 5 481 110 4 5 82594b1f557d 5d9b9da231a2 f3376063c255 + + +Status across flat/tree boundary should work + + $ hg status --rev '.^' --rev . + M a + M dir1/a + M dir1/dir1/a + + +Turning off treemanifest config has no effect + + $ hg debugindex .hg/store/meta/dir1/00manifest.i + rev offset length base linkrev nodeid p1 p2 + 0 0 125 0 4 63c9c0557d24 000000000000 000000000000 + 1 125 109 0 5 23d12a1f6e0e 000000000000 000000000000 + $ echo 2 > dir1/a + $ hg --config experimental.treemanifest=False ci -qm 'modify dir1/a' + $ hg debugindex .hg/store/meta/dir1/00manifest.i + rev offset length base linkrev nodeid p1 p2 + 0 0 125 0 4 63c9c0557d24 000000000000 000000000000 + 1 125 109 0 5 23d12a1f6e0e 000000000000 000000000000 + 2 234 55 0 6 3cb2d87b4250 23d12a1f6e0e 000000000000