store: use a StoreEntry object instead of tuple for store files
authorPierre-Yves David <pierre-yves.david@octobus.net>
Mon, 15 May 2023 08:56:23 +0200
changeset 50471 521fec115dad
parent 50470 814f55775b21
child 50472 9fdc28e21b68
store: use a StoreEntry object instead of tuple for store files We want to make the store return more semantic information instead of a stream of file path. To achieve this, we start with adding a simple object that hold the same information as the tuple it replace, and do a simple update to the user code to fetch and use the same information. From there, we will be able to iteratively upgrade the codebase toward better objects.
hgext/largefiles/lfutil.py
hgext/largefiles/reposetup.py
hgext/narrow/narrowcommands.py
hgext/remotefilelog/remotefilelogserver.py
mercurial/repair.py
mercurial/revlogutils/rewrite.py
mercurial/store.py
mercurial/streamclone.py
mercurial/upgrade_utils/engine.py
mercurial/verify.py
--- a/hgext/largefiles/lfutil.py	Mon May 15 08:56:08 2023 +0200
+++ b/hgext/largefiles/lfutil.py	Mon May 15 08:56:23 2023 +0200
@@ -552,7 +552,8 @@
 def islfilesrepo(repo):
     '''Return true if the repo is a largefile repo.'''
     if b'largefiles' in repo.requirements and any(
-        shortnameslash in f[1] for f in repo.store.datafiles()
+        shortnameslash in entry.unencoded_path
+        for entry in repo.store.datafiles()
     ):
         return True
 
--- a/hgext/largefiles/reposetup.py	Mon May 15 08:56:08 2023 +0200
+++ b/hgext/largefiles/reposetup.py	Mon May 15 08:56:23 2023 +0200
@@ -458,7 +458,8 @@
     def checkrequireslfiles(ui, repo, **kwargs):
         with repo.lock():
             if b'largefiles' not in repo.requirements and any(
-                lfutil.shortname + b'/' in f[1] for f in repo.store.datafiles()
+                lfutil.shortname + b'/' in entry.unencoded_path
+                for entry in repo.store.datafiles()
             ):
                 repo.requirements.add(b'largefiles')
                 scmutil.writereporequirements(repo)
--- a/hgext/narrow/narrowcommands.py	Mon May 15 08:56:08 2023 +0200
+++ b/hgext/narrow/narrowcommands.py	Mon May 15 08:56:23 2023 +0200
@@ -288,7 +288,8 @@
                 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
 
         todelete = []
-        for t, f, size in repo.store.datafiles():
+        for entry in repo.store.datafiles():
+            f = entry.unencoded_path
             if f.startswith(b'data/'):
                 file = f[5:-2]
                 if not newmatch(file):
--- a/hgext/remotefilelog/remotefilelogserver.py	Mon May 15 08:56:08 2023 +0200
+++ b/hgext/remotefilelog/remotefilelogserver.py	Mon May 15 08:56:23 2023 +0200
@@ -162,25 +162,34 @@
                             ):
                                 n = util.pconvert(fp[striplen:])
                                 d = store.decodedir(n)
-                                t = store.FILETYPE_OTHER
-                                yield (t, d, st.st_size)
+                                yield store.StoreEntry(
+                                    unencoded_path=d,
+                                    is_revlog=True,
+                                    revlog_type=None,
+                                    is_revlog_main=False,
+                                    is_volatile=False,
+                                    file_size=st.st_size,
+                                )
+
                         if kind == stat.S_IFDIR:
                             visit.append(fp)
 
             if scmutil.istreemanifest(repo):
-                for (t, u, s) in repo.store.datafiles():
+                for entry in repo.store.datafiles():
+                    u = entry.unencoded_path
                     if u.startswith(b'meta/') and (
                         u.endswith(b'.i') or u.endswith(b'.d')
                     ):
-                        yield (t, u, s)
+                        yield entry
 
             # Return .d and .i files that do not match the shallow pattern
             match = state.match
             if match and not match.always():
-                for (t, u, s) in repo.store.datafiles():
+                for entry in repo.store.datafiles():
+                    u = entry.unencoded_path
                     f = u[5:-2]  # trim data/...  and .i/.d
                     if not state.match(f):
-                        yield (t, u, s)
+                        yield entry
 
             for x in repo.store.topfiles():
                 if state.noflatmf and x[1][:11] == b'00manifest.':
--- a/mercurial/repair.py	Mon May 15 08:56:08 2023 +0200
+++ b/mercurial/repair.py	Mon May 15 08:56:23 2023 +0200
@@ -444,7 +444,9 @@
     if scmutil.istreemanifest(repo):
         # This logic is safe if treemanifest isn't enabled, but also
         # pointless, so we skip it if treemanifest isn't enabled.
-        for t, unencoded, size in repo.store.datafiles():
+        for entry in repo.store.datafiles():
+            unencoded = entry.unencoded_path
+            # XXX use the entry.revlog_type instead
             if unencoded.startswith(b'meta/') and unencoded.endswith(
                 b'00manifest.i'
             ):
--- a/mercurial/revlogutils/rewrite.py	Mon May 15 08:56:08 2023 +0200
+++ b/mercurial/revlogutils/rewrite.py	Mon May 15 08:56:23 2023 +0200
@@ -825,9 +825,13 @@
 
     with context():
         files = list(
-            (file_type, path)
-            for (file_type, path, _s) in repo.store.datafiles()
-            if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG
+            entry
+            for entry in repo.store.datafiles()
+            if (
+                entry.unencoded_path.endswith(b'.i')
+                and entry.is_revlog
+                and entry.revlog_type == store.FILEFLAGS_FILELOG
+            )
         )
 
         progress = ui.makeprogress(
@@ -837,7 +841,8 @@
         )
         found_nothing = True
 
-        for file_type, path in files:
+        for entry in files:
+            path = entry.unencoded_path
             progress.increment()
             filename = _get_filename_from_filelog_index(path)
             fl = _filelog_from_filename(repo, filename)
--- a/mercurial/store.py	Mon May 15 08:56:08 2023 +0200
+++ b/mercurial/store.py	Mon May 15 08:56:23 2023 +0200
@@ -10,9 +10,11 @@
 import os
 import re
 import stat
+from typing import Generator
 
 from .i18n import _
 from .pycompat import getattr
+from .thirdparty import attr
 from .node import hex
 from . import (
     changelog,
@@ -451,6 +453,20 @@
 FILETYPE_OTHER = FILEFLAGS_OTHER
 
 
+@attr.s(slots=True)
+class StoreEntry:
+    """An entry in the store
+
+    This is returned by `store.walk` and represent some data in the store."""
+
+    unencoded_path = attr.ib()
+    is_revlog = attr.ib(default=False)
+    revlog_type = attr.ib(default=None)
+    is_revlog_main = attr.ib(default=None)
+    is_volatile = attr.ib(default=False)
+    file_size = attr.ib(default=None)
+
+
 class basicstore:
     '''base class for local repository stores'''
 
@@ -500,7 +516,9 @@
         rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
         return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
 
-    def datafiles(self, matcher=None, undecodable=None):
+    def datafiles(
+        self, matcher=None, undecodable=None
+    ) -> Generator[StoreEntry, None, None]:
         """Like walk, but excluding the changelog and root manifest.
 
         When [undecodable] is None, revlogs names that can't be
@@ -510,20 +528,35 @@
         files = self._walk(b'data', True) + self._walk(b'meta', True)
         for (t, u, s) in files:
             if t is not None:
-                yield (FILEFLAGS_FILELOG | t, u, s)
+                yield StoreEntry(
+                    unencoded_path=u,
+                    is_revlog=True,
+                    revlog_type=FILEFLAGS_FILELOG,
+                    is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
+                    is_volatile=bool(t & FILEFLAGS_VOLATILE),
+                    file_size=s,
+                )
 
-    def topfiles(self):
+    def topfiles(self) -> Generator[StoreEntry, None, None]:
         # yield manifest before changelog
         files = reversed(self._walk(b'', False))
         for (t, u, s) in files:
             if u.startswith(b'00changelog'):
-                yield (FILEFLAGS_CHANGELOG | t, u, s)
+                revlog_type = FILEFLAGS_CHANGELOG
             elif u.startswith(b'00manifest'):
-                yield (FILEFLAGS_MANIFESTLOG | t, u, s)
+                revlog_type = FILEFLAGS_MANIFESTLOG
             else:
-                yield (FILETYPE_OTHER | t, u, s)
+                revlog_type = None
+            yield StoreEntry(
+                unencoded_path=u,
+                is_revlog=revlog_type is not None,
+                revlog_type=revlog_type,
+                is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
+                is_volatile=bool(t & FILEFLAGS_VOLATILE),
+                file_size=s,
+            )
 
-    def walk(self, matcher=None):
+    def walk(self, matcher=None) -> Generator[StoreEntry, None, None]:
         """return files related to data storage (ie: revlogs)
 
         yields (file_type, unencoded, size)
@@ -576,9 +609,12 @@
     # However that might change so we should probably add a test and encoding
     # decoding for it too. see issue6548
 
-    def datafiles(self, matcher=None, undecodable=None):
-        for t, f1, size in super(encodedstore, self).datafiles():
+    def datafiles(
+        self, matcher=None, undecodable=None
+    ) -> Generator[StoreEntry, None, None]:
+        for entry in super(encodedstore, self).datafiles():
             try:
+                f1 = entry.unencoded_path
                 f2 = decodefilename(f1)
             except KeyError:
                 if undecodable is None:
@@ -589,7 +625,8 @@
                     continue
             if not _matchtrackedpath(f2, matcher):
                 continue
-            yield t, f2, size
+            entry.unencoded_path = f2
+            yield entry
 
     def join(self, f):
         return self.path + b'/' + encodefilename(f)
@@ -785,7 +822,9 @@
     def getsize(self, path):
         return self.rawvfs.stat(path).st_size
 
-    def datafiles(self, matcher=None, undecodable=None):
+    def datafiles(
+        self, matcher=None, undecodable=None
+    ) -> Generator[StoreEntry, None, None]:
         for f in sorted(self.fncache):
             if not _matchtrackedpath(f, matcher):
                 continue
@@ -799,7 +838,14 @@
                 continue
             t |= FILEFLAGS_FILELOG
             try:
-                yield t, f, self.getsize(ef)
+                yield StoreEntry(
+                    unencoded_path=f,
+                    is_revlog=True,
+                    revlog_type=FILEFLAGS_FILELOG,
+                    is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
+                    is_volatile=bool(t & FILEFLAGS_VOLATILE),
+                    file_size=self.getsize(ef),
+                )
             except FileNotFoundError:
                 pass
 
--- a/mercurial/streamclone.py	Mon May 15 08:56:08 2023 +0200
+++ b/mercurial/streamclone.py	Mon May 15 08:56:23 2023 +0200
@@ -269,10 +269,10 @@
     # Get consistent snapshot of repo, lock during scan.
     with repo.lock():
         repo.ui.debug(b'scanning\n')
-        for file_type, name, size in _walkstreamfiles(repo):
-            if size:
-                entries.append((name, size))
-                total_bytes += size
+        for entry in _walkstreamfiles(repo):
+            if entry.file_size:
+                entries.append((entry.unencoded_path, entry.file_size))
+                total_bytes += entry.file_size
         _test_sync_point_walk_1(repo)
     _test_sync_point_walk_2(repo)
 
@@ -677,13 +677,15 @@
     if includes or excludes:
         matcher = narrowspec.match(repo.root, includes, excludes)
 
-    for rl_type, name, size in _walkstreamfiles(repo, matcher):
-        if size:
+    for entry in _walkstreamfiles(repo, matcher):
+        if entry.file_size:
             ft = _fileappend
-            if rl_type & store.FILEFLAGS_VOLATILE:
+            if entry.is_volatile:
                 ft = _filefull
-            entries.append((_srcstore, name, ft, size))
-            totalfilesize += size
+            entries.append(
+                (_srcstore, entry.unencoded_path, ft, entry.file_size)
+            )
+            totalfilesize += entry.file_size
     for name in _walkstreamfullstorefiles(repo):
         if repo.svfs.exists(name):
             totalfilesize += repo.svfs.lstat(name).st_size
--- a/mercurial/upgrade_utils/engine.py	Mon May 15 08:56:08 2023 +0200
+++ b/mercurial/upgrade_utils/engine.py	Mon May 15 08:56:23 2023 +0200
@@ -200,9 +200,10 @@
 
     # Perform a pass to collect metadata. This validates we can open all
     # source files and allows a unified progress bar to be displayed.
-    for rl_type, unencoded, size in alldatafiles:
-        if not rl_type & store.FILEFLAGS_REVLOG_MAIN:
+    for entry in alldatafiles:
+        if not (entry.is_revlog and entry.is_revlog_main):
             continue
+        unencoded = entry.unencoded_path
 
         # the store.walk function will wrongly pickup transaction backup and
         # get confused. As a quick fix for 5.9 release, we ignore those.
@@ -215,7 +216,7 @@
         if unencoded in skip_undo:
             continue
 
-        rl = _revlogfrompath(srcrepo, rl_type, unencoded)
+        rl = _revlogfrompath(srcrepo, entry.revlog_type, unencoded)
 
         info = rl.storageinfo(
             exclusivefiles=True,
@@ -232,19 +233,19 @@
         srcrawsize += rawsize
 
         # This is for the separate progress bars.
-        if rl_type & store.FILEFLAGS_CHANGELOG:
-            changelogs[unencoded] = rl_type
+        if entry.revlog_type & store.FILEFLAGS_CHANGELOG:
+            changelogs[unencoded] = entry.revlog_type
             crevcount += len(rl)
             csrcsize += datasize
             crawsize += rawsize
-        elif rl_type & store.FILEFLAGS_MANIFESTLOG:
-            manifests[unencoded] = rl_type
+        elif entry.revlog_type & store.FILEFLAGS_MANIFESTLOG:
+            manifests[unencoded] = entry.revlog_type
             mcount += 1
             mrevcount += len(rl)
             msrcsize += datasize
             mrawsize += rawsize
-        elif rl_type & store.FILEFLAGS_FILELOG:
-            filelogs[unencoded] = rl_type
+        elif entry.revlog_type & store.FILEFLAGS_FILELOG:
+            filelogs[unencoded] = entry.revlog_type
             fcount += 1
             frevcount += len(rl)
             fsrcsize += datasize
--- a/mercurial/verify.py	Mon May 15 08:56:08 2023 +0200
+++ b/mercurial/verify.py	Mon May 15 08:56:23 2023 +0200
@@ -407,7 +407,9 @@
             subdirs = set()
             revlogv1 = self.revlogv1
             undecodable = []
-            for t, f, size in repo.store.datafiles(undecodable=undecodable):
+            for entry in repo.store.datafiles(undecodable=undecodable):
+                f = entry.unencoded_path
+                size = entry.file_size
                 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
                     storefiles.add(_normpath(f))
                     subdirs.add(os.path.dirname(f))
@@ -472,7 +474,9 @@
 
         storefiles = set()
         undecodable = []
-        for t, f, size in repo.store.datafiles(undecodable=undecodable):
+        for entry in repo.store.datafiles(undecodable=undecodable):
+            size = entry.file_size
+            f = entry.unencoded_path
             if (size > 0 or not revlogv1) and f.startswith(b'data/'):
                 storefiles.add(_normpath(f))
         for f in undecodable: