store: return just one filename in walk functions
authorValentin Gatien-Baron <valentin.gatienbaron@gmail.com>
Mon, 02 Aug 2021 08:05:13 -0400
changeset 47877 2174f54aab18
parent 47876 517a2c1cb788
child 47878 31a72e5e9200
store: return just one filename in walk functions Various walk functions return `(revlog_type, decoded, encoded)` where decoded could be None. But no-one cares about `encoded` and expects `unencoded` to be present, except verify (because this can only happen with old repo formats). Simplify all this by either failing outright if a decoding a filename fails (instead of almost certainly failing with a type error due to treating None as a bytes), or skipping the filename but providing in an out argument for hg verify. Differential Revision: https://phab.mercurial-scm.org/D11248
hgext/narrow/narrowcommands.py
hgext/remotefilelog/contentstore.py
hgext/remotefilelog/remotefilelogserver.py
mercurial/repair.py
mercurial/revlogutils/rewrite.py
mercurial/store.py
mercurial/streamclone.py
mercurial/upgrade_utils/engine.py
mercurial/verify.py
mercurial/wireprotov2server.py
tests/simplestorerepo.py
--- a/hgext/narrow/narrowcommands.py	Sun Aug 01 10:57:21 2021 -0400
+++ b/hgext/narrow/narrowcommands.py	Mon Aug 02 08:05:13 2021 -0400
@@ -289,7 +289,7 @@
                 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
 
         todelete = []
-        for t, f, f2, size in repo.store.datafiles():
+        for t, f, size in repo.store.datafiles():
             if f.startswith(b'data/'):
                 file = f[5:-2]
                 if not newmatch(file):
--- a/hgext/remotefilelog/contentstore.py	Sun Aug 01 10:57:21 2021 -0400
+++ b/hgext/remotefilelog/contentstore.py	Mon Aug 02 08:05:13 2021 -0400
@@ -378,7 +378,7 @@
             ledger.markdataentry(self, treename, node)
             ledger.markhistoryentry(self, treename, node)
 
-        for t, path, encoded, size in self._store.datafiles():
+        for t, path, size in self._store.datafiles():
             if path[:5] != b'meta/' or path[-2:] != b'.i':
                 continue
 
--- a/hgext/remotefilelog/remotefilelogserver.py	Sun Aug 01 10:57:21 2021 -0400
+++ b/hgext/remotefilelog/remotefilelogserver.py	Mon Aug 02 08:05:13 2021 -0400
@@ -166,24 +166,24 @@
                                 n = util.pconvert(fp[striplen:])
                                 d = store.decodedir(n)
                                 t = store.FILETYPE_OTHER
-                                yield (t, d, n, st.st_size)
+                                yield (t, d, st.st_size)
                         if kind == stat.S_IFDIR:
                             visit.append(fp)
 
             if scmutil.istreemanifest(repo):
-                for (t, u, e, s) in repo.store.datafiles():
+                for (t, u, s) in repo.store.datafiles():
                     if u.startswith(b'meta/') and (
                         u.endswith(b'.i') or u.endswith(b'.d')
                     ):
-                        yield (t, u, e, s)
+                        yield (t, u, s)
 
             # Return .d and .i files that do not match the shallow pattern
             match = state.match
             if match and not match.always():
-                for (t, u, e, s) in repo.store.datafiles():
+                for (t, u, s) in repo.store.datafiles():
                     f = u[5:-2]  # trim data/...  and .i/.d
                     if not state.match(f):
-                        yield (t, u, e, s)
+                        yield (t, u, s)
 
             for x in repo.store.topfiles():
                 if state.noflatmf and x[1][:11] == b'00manifest.':
--- a/mercurial/repair.py	Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/repair.py	Mon Aug 02 08:05:13 2021 -0400
@@ -433,7 +433,7 @@
     if scmutil.istreemanifest(repo):
         # This logic is safe if treemanifest isn't enabled, but also
         # pointless, so we skip it if treemanifest isn't enabled.
-        for t, unencoded, encoded, size in repo.store.datafiles():
+        for t, unencoded, size in repo.store.datafiles():
             if unencoded.startswith(b'meta/') and unencoded.endswith(
                 b'00manifest.i'
             ):
--- a/mercurial/revlogutils/rewrite.py	Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/revlogutils/rewrite.py	Mon Aug 02 08:05:13 2021 -0400
@@ -824,7 +824,7 @@
     with context():
         files = list(
             (file_type, path)
-            for (file_type, path, _e, _s) in repo.store.datafiles()
+            for (file_type, path, _s) in repo.store.datafiles()
             if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG
         )
 
--- a/mercurial/store.py	Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/store.py	Mon Aug 02 08:05:13 2021 -0400
@@ -472,7 +472,7 @@
         return self.path + b'/' + encodedir(f)
 
     def _walk(self, relpath, recurse):
-        '''yields (unencoded, encoded, size)'''
+        '''yields (revlog_type, unencoded, size)'''
         path = self.path
         if relpath:
             path += b'/' + relpath
@@ -488,7 +488,7 @@
                     rl_type = is_revlog(f, kind, st)
                     if rl_type is not None:
                         n = util.pconvert(fp[striplen:])
-                        l.append((rl_type, decodedir(n), n, st.st_size))
+                        l.append((rl_type, decodedir(n), st.st_size))
                     elif kind == stat.S_IFDIR and recurse:
                         visit.append(fp)
         l.sort()
@@ -505,26 +505,32 @@
         rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
         return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
 
-    def datafiles(self, matcher=None):
+    def datafiles(self, matcher=None, undecodable=None):
+        """Like walk, but excluding the changelog and root manifest.
+
+        When [undecodable] is None, revlogs names that can't be
+        decoded cause an exception. When it is provided, it should
+        be a list and the filenames that can't be decoded are added
+        to it instead. This is very rarely needed."""
         files = self._walk(b'data', True) + self._walk(b'meta', True)
-        for (t, u, e, s) in files:
-            yield (FILEFLAGS_FILELOG | t, u, e, s)
+        for (t, u, s) in files:
+            yield (FILEFLAGS_FILELOG | t, u, s)
 
     def topfiles(self):
         # yield manifest before changelog
         files = reversed(self._walk(b'', False))
-        for (t, u, e, s) in files:
+        for (t, u, s) in files:
             if u.startswith(b'00changelog'):
-                yield (FILEFLAGS_CHANGELOG | t, u, e, s)
+                yield (FILEFLAGS_CHANGELOG | t, u, s)
             elif u.startswith(b'00manifest'):
-                yield (FILEFLAGS_MANIFESTLOG | t, u, e, s)
+                yield (FILEFLAGS_MANIFESTLOG | t, u, s)
             else:
-                yield (FILETYPE_OTHER | t, u, e, s)
+                yield (FILETYPE_OTHER | t, u, s)
 
     def walk(self, matcher=None):
         """return file related to data storage (ie: revlogs)
 
-        yields (file_type, unencoded, encoded, size)
+        yields (file_type, unencoded, size)
 
         if a matcher is passed, storage files of only those tracked paths
         are passed with matches the matcher
@@ -574,15 +580,20 @@
     # However that might change so we should probably add a test and encoding
     # decoding for it too. see issue6548
 
-    def datafiles(self, matcher=None):
-        for t, a, b, size in super(encodedstore, self).datafiles():
+    def datafiles(self, matcher=None, undecodable=None):
+        for t, f1, size in super(encodedstore, self).datafiles():
             try:
-                a = decodefilename(a)
+                f2 = decodefilename(f1)
             except KeyError:
-                a = None
-            if a is not None and not _matchtrackedpath(a, matcher):
+                if undecodable is None:
+                    msg = _(b'undecodable revlog name %s') % f1
+                    raise error.StorageError(msg)
+                else:
+                    undecodable.append(f1)
+                    continue
+            if not _matchtrackedpath(f2, matcher):
                 continue
-            yield t, a, b, size
+            yield t, f2, size
 
     def join(self, f):
         return self.path + b'/' + encodefilename(f)
@@ -770,7 +781,7 @@
     def getsize(self, path):
         return self.rawvfs.stat(path).st_size
 
-    def datafiles(self, matcher=None):
+    def datafiles(self, matcher=None, undecodable=None):
         for f in sorted(self.fncache):
             if not _matchtrackedpath(f, matcher):
                 continue
@@ -779,7 +790,7 @@
                 t = revlog_type(f)
                 assert t is not None, f
                 t |= FILEFLAGS_FILELOG
-                yield t, f, ef, self.getsize(ef)
+                yield t, f, self.getsize(ef)
             except OSError as err:
                 if err.errno != errno.ENOENT:
                     raise
--- a/mercurial/streamclone.py	Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/streamclone.py	Mon Aug 02 08:05:13 2021 -0400
@@ -248,7 +248,7 @@
     # Get consistent snapshot of repo, lock during scan.
     with repo.lock():
         repo.ui.debug(b'scanning\n')
-        for file_type, name, ename, size in _walkstreamfiles(repo):
+        for file_type, name, size in _walkstreamfiles(repo):
             if size:
                 entries.append((name, size))
                 total_bytes += size
@@ -650,7 +650,7 @@
     if includes or excludes:
         matcher = narrowspec.match(repo.root, includes, excludes)
 
-    for rl_type, name, ename, size in _walkstreamfiles(repo, matcher):
+    for rl_type, name, size in _walkstreamfiles(repo, matcher):
         if size:
             ft = _fileappend
             if rl_type & store.FILEFLAGS_VOLATILE:
--- a/mercurial/upgrade_utils/engine.py	Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/upgrade_utils/engine.py	Mon Aug 02 08:05:13 2021 -0400
@@ -201,7 +201,7 @@
 
     # Perform a pass to collect metadata. This validates we can open all
     # source files and allows a unified progress bar to be displayed.
-    for rl_type, unencoded, encoded, size in alldatafiles:
+    for rl_type, unencoded, size in alldatafiles:
         if not rl_type & store.FILEFLAGS_REVLOG_MAIN:
             continue
 
--- a/mercurial/verify.py	Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/verify.py	Mon Aug 02 08:05:13 2021 -0400
@@ -395,12 +395,13 @@
             storefiles = set()
             subdirs = set()
             revlogv1 = self.revlogv1
-            for t, f, f2, size in repo.store.datafiles():
-                if not f:
-                    self._err(None, _(b"cannot decode filename '%s'") % f2)
-                elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
+            undecodable = []
+            for t, f, size in repo.store.datafiles(undecodable=undecodable):
+                if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
                     storefiles.add(_normpath(f))
                     subdirs.add(os.path.dirname(f))
+            for f in undecodable:
+                self._err(None, _(b"cannot decode filename '%s'") % f)
             subdirprogress = ui.makeprogress(
                 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
             )
@@ -459,11 +460,12 @@
         ui.status(_(b"checking files\n"))
 
         storefiles = set()
-        for rl_type, f, f2, size in repo.store.datafiles():
-            if not f:
-                self._err(None, _(b"cannot decode filename '%s'") % f2)
-            elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
+        undecodable = []
+        for t, f, size in repo.store.datafiles(undecodable=undecodable):
+            if (size > 0 or not revlogv1) and f.startswith(b'data/'):
                 storefiles.add(_normpath(f))
+        for f in undecodable:
+            self._err(None, _(b"cannot decode filename '%s'") % f)
 
         state = {
             # TODO this assumes revlog storage for changelog.
--- a/mercurial/wireprotov2server.py	Sun Aug 01 10:57:21 2021 -0400
+++ b/mercurial/wireprotov2server.py	Mon Aug 02 08:05:13 2021 -0400
@@ -1579,7 +1579,7 @@
 
     # TODO this is a bunch of storage layer interface abstractions because
     # it assumes revlogs.
-    for rl_type, name, encodedname, size in topfiles:
+    for rl_type, name, size in topfiles:
         # XXX use the `rl_type` for that
         if b'changelog' in files and name.startswith(b'00changelog'):
             pass
--- a/tests/simplestorerepo.py	Sun Aug 01 10:57:21 2021 -0400
+++ b/tests/simplestorerepo.py	Mon Aug 02 08:05:13 2021 -0400
@@ -665,20 +665,24 @@
 
 
 class simplestore(store.encodedstore):
-    def datafiles(self):
+    def datafiles(self, undecodable=None):
         for x in super(simplestore, self).datafiles():
             yield x
 
         # Supplement with non-revlog files.
         extrafiles = self._walk('data', True, filefilter=issimplestorefile)
 
-        for unencoded, encoded, size in extrafiles:
+        for f1, size in extrafiles:
             try:
-                unencoded = store.decodefilename(unencoded)
+                f2 = store.decodefilename(f1)
             except KeyError:
-                unencoded = None
+                if undecodable is None:
+                    raise error.StorageError(b'undecodable revlog name %s' % f1)
+                else:
+                    undecodable.append(f1)
+                    continue
 
-            yield unencoded, encoded, size
+            yield f2, size
 
 
 def reposetup(ui, repo):