cleanup: say goodbye to manifestv2 format
authorAugie Fackler <augie@google.com>
Thu, 22 Feb 2018 20:04:42 -0500
changeset 36373 0147a4730420
parent 36372 b8d0761a85c7
child 36374 f0c94af0d70d
cleanup: say goodbye to manifestv2 format This experiment was a bust: we'd hoped for smaller repository sizes, but things got larger. Google ended up rolling out tree manifests in a format that's compatible with the original manifest format, and I believe Facebook is doing the same. This code was never implemented as native speedups, so I'm pretty comfortable saying nobody is using the experimental feature. Let's rip it out. I noticed this code still kicking around because I was investigating a repo corruption issue for timeless. .. bc:: Support for the experimental manifestv2 format has been removed, as it was never completed and failed to meet expectations. Differential Revision: https://phab.mercurial-scm.org/D2393
mercurial/configitems.py
mercurial/help/internals/requirements.txt
mercurial/localrepo.py
mercurial/manifest.py
mercurial/upgrade.py
tests/test-manifest.py
tests/test-manifestv2.t
tests/test-upgrade-repo.t
--- a/mercurial/configitems.py	Wed Feb 21 16:47:39 2018 -0800
+++ b/mercurial/configitems.py	Thu Feb 22 20:04:42 2018 -0500
@@ -538,9 +538,6 @@
 coreconfigitem('experimental', 'httppostargs',
     default=False,
 )
-coreconfigitem('experimental', 'manifestv2',
-    default=False,
-)
 coreconfigitem('experimental', 'mergedriver',
     default=None,
 )
--- a/mercurial/help/internals/requirements.txt	Wed Feb 21 16:47:39 2018 -0800
+++ b/mercurial/help/internals/requirements.txt	Thu Feb 22 20:04:42 2018 -0500
@@ -1,4 +1,3 @@
-
 Repositories contain a file (``.hg/requires``) containing a list of
 features/capabilities that are *required* for clients to interface
 with the repository. This file has been present in Mercurial since
@@ -105,8 +104,10 @@
 Denotes that version 2 of manifests are being used.
 
 Support for this requirement was added in Mercurial 3.4 (released
-May 2015). The requirement is currently experimental and is disabled
-by default.
+May 2015). The new format failed to meet expectations and support
+for the format and requirement were removed in Mercurial 4.6
+(released May 2018) since the feature never graduated frome experiment
+status.
 
 treemanifest
 ============
--- a/mercurial/localrepo.py	Wed Feb 21 16:47:39 2018 -0800
+++ b/mercurial/localrepo.py	Thu Feb 22 20:04:42 2018 -0500
@@ -303,11 +303,15 @@
 
 class localrepository(object):
 
+    # obsolete experimental requirements:
+    #  - manifestv2: An experimental new manifest format that allowed
+    #    for stem compression of long paths. Experiment ended up not
+    #    being successful (repository sizes went up due to worse delta
+    #    chains), and the code was deleted in 4.6.
     supportedformats = {
         'revlogv1',
         'generaldelta',
         'treemanifest',
-        'manifestv2',
         REVLOGV2_REQUIREMENT,
     }
     _basesupported = supportedformats | {
@@ -322,7 +326,6 @@
         'revlogv1',
         'generaldelta',
         'treemanifest',
-        'manifestv2',
     }
 
     # a list of (ui, featureset) functions.
@@ -2261,8 +2264,6 @@
         requirements.add('generaldelta')
     if ui.configbool('experimental', 'treemanifest'):
         requirements.add('treemanifest')
-    if ui.configbool('experimental', 'manifestv2'):
-        requirements.add('manifestv2')
 
     revlogv2 = ui.config('experimental', 'revlogv2')
     if revlogv2 == 'enable-unstable-format-and-corrupt-my-data':
--- a/mercurial/manifest.py	Wed Feb 21 16:47:39 2018 -0800
+++ b/mercurial/manifest.py	Thu Feb 22 20:04:42 2018 -0500
@@ -9,7 +9,6 @@
 
 import heapq
 import itertools
-import os
 import struct
 
 from .i18n import _
@@ -28,7 +27,7 @@
 parsers = policy.importmod(r'parsers')
 propertycache = util.propertycache
 
-def _parsev1(data):
+def _parse(data):
     # This method does a little bit of excessive-looking
     # precondition checking. This is so that the behavior of this
     # class exactly matches its C counterpart to try and help
@@ -47,43 +46,7 @@
         else:
             yield f, bin(n), ''
 
-def _parsev2(data):
-    metadataend = data.find('\n')
-    # Just ignore metadata for now
-    pos = metadataend + 1
-    prevf = ''
-    while pos < len(data):
-        end = data.find('\n', pos + 1) # +1 to skip stem length byte
-        if end == -1:
-            raise ValueError('Manifest ended with incomplete file entry.')
-        stemlen = ord(data[pos:pos + 1])
-        items = data[pos + 1:end].split('\0')
-        f = prevf[:stemlen] + items[0]
-        if prevf > f:
-            raise ValueError('Manifest entries not in sorted order.')
-        fl = items[1]
-        # Just ignore metadata (items[2:] for now)
-        n = data[end + 1:end + 21]
-        yield f, n, fl
-        pos = end + 22
-        prevf = f
-
-def _parse(data):
-    """Generates (path, node, flags) tuples from a manifest text"""
-    if data.startswith('\0'):
-        return iter(_parsev2(data))
-    else:
-        return iter(_parsev1(data))
-
-def _text(it, usemanifestv2):
-    """Given an iterator over (path, node, flags) tuples, returns a manifest
-    text"""
-    if usemanifestv2:
-        return _textv2(it)
-    else:
-        return _textv1(it)
-
-def _textv1(it):
+def _text(it):
     files = []
     lines = []
     _hex = revlog.hex
@@ -96,19 +59,6 @@
     _checkforbidden(files)
     return ''.join(lines)
 
-def _textv2(it):
-    files = []
-    lines = ['\0\n']
-    prevf = ''
-    for f, n, fl in it:
-        files.append(f)
-        stem = os.path.commonprefix([prevf, f])
-        stemlen = min(len(stem), 255)
-        lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
-        prevf = f
-    _checkforbidden(files)
-    return ''.join(lines)
-
 class lazymanifestiter(object):
     def __init__(self, lm):
         self.pos = 0
@@ -414,13 +364,7 @@
 
 class manifestdict(object):
     def __init__(self, data=''):
-        if data.startswith('\0'):
-            #_lazymanifest can not parse v2
-            self._lm = _lazymanifest('')
-            for f, n, fl in _parsev2(data):
-                self._lm[f] = n, fl
-        else:
-            self._lm = _lazymanifest(data)
+        self._lm = _lazymanifest(data)
 
     def __getitem__(self, key):
         return self._lm[key][0]
@@ -589,12 +533,9 @@
     def iterentries(self):
         return self._lm.iterentries()
 
-    def text(self, usemanifestv2=False):
-        if usemanifestv2:
-            return _textv2(self._lm.iterentries())
-        else:
-            # use (probably) native version for v1
-            return self._lm.text()
+    def text(self):
+        # most likely uses native version
+        return self._lm.text()
 
     def fastdelta(self, base, changes):
         """Given a base manifest text as a bytearray and a list of changes
@@ -1138,12 +1079,12 @@
                 if fl:
                     self._flags[f] = fl
 
-    def text(self, usemanifestv2=False):
+    def text(self):
         """Get the full data of this manifest as a bytestring."""
         self._load()
-        return _text(self.iterentries(), usemanifestv2)
+        return _text(self.iterentries())
 
-    def dirtext(self, usemanifestv2=False):
+    def dirtext(self):
         """Get the full data of this directory as a bytestring. Make sure that
         any submanifests have been written first, so their nodeids are correct.
         """
@@ -1151,7 +1092,7 @@
         flags = self.flags
         dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
         files = [(f, self._files[f], flags(f)) for f in self._files]
-        return _text(sorted(dirs + files), usemanifestv2)
+        return _text(sorted(dirs + files))
 
     def read(self, gettext, readsubtree):
         def _load_for_read(s):
@@ -1208,15 +1149,12 @@
         # stacks of commits, the number can go up, hence the config knob below.
         cachesize = 4
         optiontreemanifest = False
-        usemanifestv2 = False
         opts = getattr(opener, 'options', None)
         if opts is not None:
             cachesize = opts.get('manifestcachesize', cachesize)
             optiontreemanifest = opts.get('treemanifest', False)
-            usemanifestv2 = opts.get('manifestv2', usemanifestv2)
 
         self._treeondisk = optiontreemanifest or treemanifest
-        self._usemanifestv2 = usemanifestv2
 
         self._fulltextcache = util.lrucachedict(cachesize)
 
@@ -1262,8 +1200,7 @@
         return self._dirlogcache[d]
 
     def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
-        if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta')
-            and not self._usemanifestv2):
+        if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'):
             # If our first parent is in the manifest cache, we can
             # compute a delta here using properties we know about the
             # manifest up-front, which may save time later for the
@@ -1290,7 +1227,7 @@
                 n = self._addtree(m, transaction, link, m1, m2, readtree)
                 arraytext = None
             else:
-                text = m.text(self._usemanifestv2)
+                text = m.text()
                 n = self.addrevision(text, transaction, link, p1, p2)
                 arraytext = bytearray(text)
 
@@ -1309,13 +1246,13 @@
             sublog.add(subm, transaction, link, subp1, subp2, None, None,
                        readtree=readtree)
         m.writesubtrees(m1, m2, writesubtree)
-        text = m.dirtext(self._usemanifestv2)
+        text = m.dirtext()
         n = None
         if self._dir != '':
             # Double-check whether contents are unchanged to one parent
-            if text == m1.dirtext(self._usemanifestv2):
+            if text == m1.dirtext():
                 n = m1.node()
-            elif text == m2.dirtext(self._usemanifestv2):
+            elif text == m2.dirtext():
                 n = m2.node()
 
         if not n:
@@ -1493,19 +1430,6 @@
         Changing the value of `shallow` has no effect on flat manifests.
         '''
         revlog = self._revlog()
-        if revlog._usemanifestv2:
-            # Need to perform a slow delta
-            r0 = revlog.deltaparent(revlog.rev(self._node))
-            m0 = self._manifestlog[revlog.node(r0)].read()
-            m1 = self.read()
-            md = manifestdict()
-            for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
-                if n1:
-                    md[f] = n1
-                    if fl1:
-                        md.setflag(f, fl1)
-            return md
-
         r = revlog.rev(self._node)
         d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
         return manifestdict(d)
@@ -1608,7 +1532,7 @@
         its 't' flag.
         '''
         revlog = self._revlog()
-        if shallow and not revlog._usemanifestv2:
+        if shallow:
             r = revlog.rev(self._node)
             d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
             return manifestdict(d)
--- a/mercurial/upgrade.py	Wed Feb 21 16:47:39 2018 -0800
+++ b/mercurial/upgrade.py	Thu Feb 22 20:04:42 2018 -0500
@@ -46,7 +46,6 @@
     return {
         # The upgrade code does not yet support these experimental features.
         # This is an artificial limitation.
-        'manifestv2',
         'treemanifest',
         # This was a precursor to generaldelta and was never enabled by default.
         # It should (hopefully) not exist in the wild.
--- a/tests/test-manifest.py	Wed Feb 21 16:47:39 2018 -0800
+++ b/tests/test-manifest.py	Thu Feb 22 20:04:42 2018 -0500
@@ -11,7 +11,6 @@
 )
 
 EMTPY_MANIFEST = b''
-EMTPY_MANIFEST_V2 = b'\0\n'
 
 HASH_1 = b'1' * 40
 BIN_HASH_1 = binascii.unhexlify(HASH_1)
@@ -28,42 +27,6 @@
          b'flag2': b'l',
          }
 
-# Same data as A_SHORT_MANIFEST
-A_SHORT_MANIFEST_V2 = (
-    b'\0\n'
-    b'\x00bar/baz/qux.py\0%(flag2)s\n%(hash2)s\n'
-    b'\x00foo\0%(flag1)s\n%(hash1)s\n'
-    ) % {b'hash1': BIN_HASH_1,
-         b'flag1': b'',
-         b'hash2': BIN_HASH_2,
-         b'flag2': b'l',
-         }
-
-# Same data as A_SHORT_MANIFEST
-A_METADATA_MANIFEST = (
-    b'\0foo\0bar\n'
-    b'\x00bar/baz/qux.py\0%(flag2)s\0foo\0bar\n%(hash2)s\n' # flag and metadata
-    b'\x00foo\0%(flag1)s\0foo\n%(hash1)s\n' # no flag, but metadata
-    ) % {b'hash1': BIN_HASH_1,
-         b'flag1': b'',
-         b'hash2': BIN_HASH_2,
-         b'flag2': b'l',
-         }
-
-A_STEM_COMPRESSED_MANIFEST = (
-    b'\0\n'
-    b'\x00bar/baz/qux.py\0%(flag2)s\n%(hash2)s\n'
-    b'\x04qux/foo.py\0%(flag1)s\n%(hash1)s\n' # simple case of 4 stem chars
-    b'\x0az.py\0%(flag1)s\n%(hash1)s\n' # tricky newline = 10 stem characters
-    b'\x00%(verylongdir)sx/x\0\n%(hash1)s\n'
-    b'\xffx/y\0\n%(hash2)s\n' # more than 255 stem chars
-    ) % {b'hash1': BIN_HASH_1,
-         b'flag1': b'',
-         b'hash2': BIN_HASH_2,
-         b'flag2': b'l',
-         b'verylongdir': 255 * b'x',
-         }
-
 A_DEEPER_MANIFEST = (
     b'a/b/c/bar.py\0%(hash3)s%(flag1)s\n'
     b'a/b/c/bar.txt\0%(hash1)s%(flag1)s\n'
@@ -111,11 +74,6 @@
         self.assertEqual(0, len(m))
         self.assertEqual([], list(m))
 
-    def testEmptyManifestv2(self):
-        m = self.parsemanifest(EMTPY_MANIFEST_V2)
-        self.assertEqual(0, len(m))
-        self.assertEqual([], list(m))
-
     def testManifest(self):
         m = self.parsemanifest(A_SHORT_MANIFEST)
         self.assertEqual([b'bar/baz/qux.py', b'foo'], list(m))
@@ -126,31 +84,6 @@
         with self.assertRaises(KeyError):
             m[b'wat']
 
-    def testParseManifestV2(self):
-        m1 = self.parsemanifest(A_SHORT_MANIFEST)
-        m2 = self.parsemanifest(A_SHORT_MANIFEST_V2)
-        # Should have same content as A_SHORT_MANIFEST
-        self.assertEqual(m1.text(), m2.text())
-
-    def testParseManifestMetadata(self):
-        # Metadata is for future-proofing and should be accepted but ignored
-        m = self.parsemanifest(A_METADATA_MANIFEST)
-        self.assertEqual(A_SHORT_MANIFEST, m.text())
-
-    def testParseManifestStemCompression(self):
-        m = self.parsemanifest(A_STEM_COMPRESSED_MANIFEST)
-        self.assertIn(b'bar/baz/qux.py', m)
-        self.assertIn(b'bar/qux/foo.py', m)
-        self.assertIn(b'bar/qux/foz.py', m)
-        self.assertIn(256 * b'x' + b'/x', m)
-        self.assertIn(256 * b'x' + b'/y', m)
-        self.assertEqual(A_STEM_COMPRESSED_MANIFEST, m.text(usemanifestv2=True))
-
-    def testTextV2(self):
-        m1 = self.parsemanifest(A_SHORT_MANIFEST)
-        v2text = m1.text(usemanifestv2=True)
-        self.assertEqual(A_SHORT_MANIFEST_V2, v2text)
-
     def testSetItem(self):
         want = BIN_HASH_1
 
--- a/tests/test-manifestv2.t	Wed Feb 21 16:47:39 2018 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,102 +0,0 @@
-Create repo with old manifest
-
-  $ cat << EOF >> $HGRCPATH
-  > [format]
-  > usegeneraldelta=yes
-  > EOF
-
-  $ hg init existing
-  $ cd existing
-  $ echo footext > foo
-  $ hg add foo
-  $ hg commit -m initial
-
-We're using v1, so no manifestv2 entry is in requires yet.
-
-  $ grep manifestv2 .hg/requires
-  [1]
-
-Let's clone this with manifestv2 enabled to switch to the new format for
-future commits.
-
-  $ cd ..
-  $ hg clone --pull existing new --config experimental.manifestv2=1
-  requesting all changes
-  adding changesets
-  adding manifests
-  adding file changes
-  added 1 changesets with 1 changes to 1 files
-  new changesets 0fc9a4fafa44
-  updating to branch default
-  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
-  $ cd new
-
-Check that entry was added to .hg/requires.
-
-  $ grep manifestv2 .hg/requires
-  manifestv2
-
-Make a new commit.
-
-  $ echo newfootext > foo
-  $ hg commit -m new
-
-Check that the manifest actually switched to v2.
-
-  $ hg debugdata -m 0
-  foo\x0021e958b1dca695a60ee2e9cf151753204ee0f9e9 (esc)
-
-  $ hg debugdata -m 1
-  \x00 (esc)
-  \x00foo\x00 (esc)
-  I\xab\x7f\xb8(\x83\xcas\x15\x9d\xc2\xd3\xd3:5\x08\xbad5_ (esc)
-
-Check that manifestv2 is used if the requirement is present, even if it's
-disabled in the config.
-
-  $ echo newerfootext > foo
-  $ hg --config experimental.manifestv2=False commit -m newer
-
-  $ hg debugdata -m 2
-  \x00 (esc)
-  \x00foo\x00 (esc)
-  \xa6\xb1\xfb\xef]\x91\xa1\x19`\xf3.#\x90S\xf8\x06 \xe2\x19\x00 (esc)
-
-Check that we can still read v1 manifests.
-
-  $ hg files -r 0
-  foo
-
-  $ cd ..
-
-Check that entry is added to .hg/requires on repo creation
-
-  $ hg --config experimental.manifestv2=True init repo
-  $ cd repo
-  $ grep manifestv2 .hg/requires
-  manifestv2
-
-Set up simple repo
-
-  $ echo a > file1
-  $ echo b > file2
-  $ echo c > file3
-  $ hg ci -Aqm 'initial'
-  $ echo d > file2
-  $ hg ci -m 'modify file2'
-
-Check that 'hg verify', which uses manifest.readdelta(), works
-
-  $ hg verify
-  checking changesets
-  checking manifests
-  crosschecking files in changesets and manifests
-  checking files
-  3 files, 2 changesets, 4 total revisions
-
-Check that manifest revlog is smaller than for v1
-
-  $ hg debugindex -m
-     rev    offset  length  delta linkrev nodeid       p1           p2
-       0         0      81     -1       0 57361477c778 000000000000 000000000000
-       1        81      33      0       1 aeaab5a2ef74 57361477c778 000000000000
--- a/tests/test-upgrade-repo.t	Wed Feb 21 16:47:39 2018 -0800
+++ b/tests/test-upgrade-repo.t	Thu Feb 22 20:04:42 2018 -0500
@@ -31,23 +31,18 @@
   abort: cannot upgrade repository; unsupported source requirement: shared
   [255]
 
-Do not yet support upgrading manifestv2 and treemanifest repos
-
-  $ hg --config experimental.manifestv2=true init manifestv2
-  $ hg -R manifestv2 debugupgraderepo
-  abort: cannot upgrade repository; unsupported source requirement: manifestv2
-  [255]
+Do not yet support upgrading treemanifest repos
 
   $ hg --config experimental.treemanifest=true init treemanifest
   $ hg -R treemanifest debugupgraderepo
   abort: cannot upgrade repository; unsupported source requirement: treemanifest
   [255]
 
-Cannot add manifestv2 or treemanifest requirement during upgrade
+Cannot add treemanifest requirement during upgrade
 
   $ hg init disallowaddedreq
-  $ hg -R disallowaddedreq --config experimental.manifestv2=true --config experimental.treemanifest=true debugupgraderepo
-  abort: cannot upgrade repository; do not support adding requirement: manifestv2, treemanifest
+  $ hg -R disallowaddedreq --config experimental.treemanifest=true debugupgraderepo
+  abort: cannot upgrade repository; do not support adding requirement: treemanifest
   [255]
 
 An upgrade of a repository created with recommended settings only suggests optimizations