store: stop relying on a `revlog_type` property
authorPierre-Yves David <pierre-yves.david@octobus.net>
Tue, 30 May 2023 17:43:59 +0100
changeset 50671 e06d1a779eb6
parent 50670 11562d72cb7b
child 50672 3b56395404a1
store: stop relying on a `revlog_type` property We want to know if a file is related to a revlog, but the rest is dealt with differently already, so we simplify things further. as a bonus, this cleanup This provides a small but noticeable speedup. The number below use `hg perf::stream-locked-section` to measure the time spend in the locked section of the streaming clone. Number are run on various repository and compare different steps.: 1) the effect of this patchs, 2) the effect of the cleanup series, 2) current state compared to because large refactoring. All benchmarks are run on linux with Python 3.10.7. ### Effect of this patch # mercurial-2018-08-01-zstd-sparse-revlog # benchmark.name = perf-stream-locked-section before: 0.030246 seconds after: 0.029274 seconds (-3.21%) # pypy-2018-08-01-zstd-sparse-revlog before: 0.545012 seconds after: 0.520872 seconds (-4.43%) # netbeans-2018-08-01-zstd-sparse-revlog before: 2.719939 seconds after: 2.626791 seconds (-3.42%) # mozilla-central-2018-08-01-zstd-sparse-revlog before: 6.304179 seconds after: 6.096700 seconds (-3.29%) # mozilla-try-2019-02-18-zstd-sparse-revlog before: 14.142687 seconds after: 13.640779 seconds (-3.55%) ### Effect of this series A small but sizeable speedup # mercurial-2018-08-01-zstd-sparse-revlog before: 0.031122 seconds after: 0.029274 seconds (-5.94%) # pypy-2018-08-01-zstd-sparse-revlog before: 0.589970 seconds after: 0.520872 seconds (-11.71%) # netbeans-2018-08-01-zstd-sparse-revlog before: 2.980300 seconds after: 2.626791 seconds (-11.86%) # mozilla-central-2018-08-01-zstd-sparse-revlog before: 6.863204 seconds after: 6.096700 seconds (-11.17%) # mozilla-try-2019-02-18-zstd-sparse-revlog before: 14.921393 seconds after: 13.640779 seconds (-8.58%) ### Current state compared to the pre-refactoring state The refactoring introduced multiple string manipulation and dictionary creation that seems to induce a signifiant slowdown Slowdown # mercurial-2018-08-01-zstd-sparse-revlog 6.4.3: 0.019459 seconds after: 0.029274 seconds (+50.44%) ## pypy-2018-08-01-zstd-sparse-revlog 6.4.3: 0.290715 seconds after: 0.520872 seconds (+79.17%) # netbeans-2018-08-01-zstd-sparse-revlog 6.4.3: 1.403447 seconds after: 2.626791 seconds (+87.17%) # mozilla-central-2018-08-01-zstd-sparse-revlog 6.4.3: 3.163549 seconds after: 6.096700 seconds (+92.72%) # mozilla-try-2019-02-18-zstd-sparse-revlog 6.4.3: 6.702184 seconds after: 13.640779 seconds (+103.53%)
mercurial/store.py
mercurial/upgrade_utils/engine.py
--- a/mercurial/store.py	Tue May 30 16:38:13 2023 +0100
+++ b/mercurial/store.py	Tue May 30 17:43:59 2023 +0100
@@ -385,8 +385,8 @@
     b'requires',
 ]
 
-REVLOG_FILES_MAIN_EXT = (b'.i',)
-REVLOG_FILES_OTHER_EXT = (
+REVLOG_FILES_EXT = (
+    b'.i',
     b'.idx',
     b'.d',
     b'.dat',
@@ -415,22 +415,16 @@
 
 def is_revlog(f, kind, st):
     if kind != stat.S_IFREG:
-        return None
-    return revlog_type(f)
+        return False
+    if f.endswith(REVLOG_FILES_EXT):
+        return True
+    return False
 
 
-def revlog_type(f):
-    # XXX we need to filter `undo.` created by the transaction here, however
-    # being naive about it also filter revlog for `undo.*` files, leading to
-    # issue6542. So we no longer use EXCLUDED.
-    if f.endswith(REVLOG_FILES_MAIN_EXT):
-        return FILEFLAGS_REVLOG_MAIN
-    elif f.endswith(REVLOG_FILES_OTHER_EXT):
-        t = FILETYPE_FILELOG_OTHER
-        if f.endswith(REVLOG_FILES_VOLATILE_EXT):
-            t |= FILEFLAGS_VOLATILE
-        return t
-    return None
+def is_revlog_file(f):
+    if f.endswith(REVLOG_FILES_EXT):
+        return True
+    return False
 
 
 # the file is part of changelog data
@@ -758,10 +752,9 @@
                 p = visit.pop()
                 for f, kind, st in readdir(p, stat=True):
                     fp = p + b'/' + f
-                    rl_type = is_revlog(f, kind, st)
-                    if rl_type is not None:
+                    if is_revlog(f, kind, st):
                         n = util.pconvert(fp[striplen:])
-                        l.append((decodedir(n), (rl_type, st.st_size)))
+                        l.append((decodedir(n), st.st_size))
                     elif kind == stat.S_IFDIR and recurse:
                         visit.append(fp)
 
@@ -794,20 +787,16 @@
         ]
         for base_dir, rl_type, strip_filename in dirs:
             files = self._walk(base_dir, True, undecodable=undecodable)
-            files = (f for f in files if f[1][0] is not None)
             for revlog, details in _gather_revlog(files):
-                file_details = {}
                 revlog_target_id = revlog.split(b'/', 1)[1]
                 if strip_filename and b'/' in revlog:
                     revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
                     revlog_target_id += b'/'
-                for ext, (t, size) in sorted(details.items()):
-                    file_details[ext] = size
                 yield RevlogStoreEntry(
                     path_prefix=revlog,
                     revlog_type=rl_type,
                     target_id=revlog_target_id,
-                    details=file_details,
+                    details=details,
                 )
 
     def top_entries(
@@ -831,17 +820,17 @@
         changelogs = collections.defaultdict(dict)
         manifestlogs = collections.defaultdict(dict)
 
-        for u, (t, s) in files:
+        for u, s in files:
             if u.startswith(b'00changelog'):
                 name, ext = _split_revlog_ext(u)
-                changelogs[name][ext] = (t, s)
+                changelogs[name][ext] = s
             elif u.startswith(b'00manifest'):
                 name, ext = _split_revlog_ext(u)
-                manifestlogs[name][ext] = (t, s)
+                manifestlogs[name][ext] = s
             else:
                 yield SimpleStoreEntry(
                     entry_path=u,
-                    is_volatile=bool(t & FILEFLAGS_VOLATILE),
+                    is_volatile=False,
                     file_size=s,
                 )
         # yield manifest before changelog
@@ -853,14 +842,11 @@
         assert len(changelogs) <= 1
         for data, revlog_type in top_rl:
             for revlog, details in sorted(data.items()):
-                file_details = {}
-                for ext, (t, size) in details.items():
-                    file_details[ext] = size
                 yield RevlogStoreEntry(
                     path_prefix=revlog,
                     revlog_type=revlog_type,
                     target_id=b'',
-                    details=file_details,
+                    details=details,
                 )
 
     def walk(
@@ -1083,7 +1069,7 @@
         if (
             mode not in (b'r', b'rb')
             and (path.startswith(b'data/') or path.startswith(b'meta/'))
-            and revlog_type(path) is not None
+            and is_revlog_file(path)
         ):
             # do not trigger a fncache load when adding a file that already is
             # known to exist.
@@ -1136,14 +1122,12 @@
     def data_entries(
         self, matcher=None, undecodable=None
     ) -> Generator[BaseStoreEntry, None, None]:
-        files = ((f, revlog_type(f)) for f in self.fncache)
         # Note: all files in fncache should be revlog related, However the
         # fncache might contains such file added by previous version of
         # Mercurial.
-        files = (f for f in files if f[1] is not None)
+        files = ((f, None) for f in self.fncache if is_revlog_file(f))
         by_revlog = _gather_revlog(files)
         for revlog, details in by_revlog:
-            file_details = {}
             if revlog.startswith(b'data/'):
                 rl_type = FILEFLAGS_FILELOG
                 revlog_target_id = revlog.split(b'/', 1)[1]
@@ -1155,13 +1139,11 @@
             else:
                 # unreachable
                 assert False, revlog
-            for ext in details:
-                file_details[ext] = None
             entry = RevlogStoreEntry(
                 path_prefix=revlog,
                 revlog_type=rl_type,
                 target_id=revlog_target_id,
-                details=file_details,
+                details=details,
             )
             if _match_tracked_entry(entry, matcher):
                 yield entry
--- a/mercurial/upgrade_utils/engine.py	Tue May 30 16:38:13 2023 +0100
+++ b/mercurial/upgrade_utils/engine.py	Tue May 30 17:43:59 2023 +0100
@@ -370,7 +370,7 @@
     are cloned"""
     for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
         # don't copy revlogs as they are already cloned
-        if store.revlog_type(path) is not None:
+        if store.is_revlog_file(path):
             continue
         # Skip transaction related files.
         if path.startswith(b'undo'):