sidedata: enable sidedata computers to optionally rewrite flags
authorRaphaël Gomès <rgomes@octobus.net>
Thu, 08 Apr 2021 16:55:17 +0200
changeset 47078 223b47235d1c
parent 47077 119790e1c67c
child 47082 1680c94741f8
sidedata: enable sidedata computers to optionally rewrite flags Sidedata computers may want to influence the flags of the revision they touch. For example, the computer for changelog-based copytracing can add a flag to signify that this revision might affect copytracing, inversely removing said flag if the information is no longer applicable. See inline documentation in `storageutil` for more details. Differential Revision: https://phab.mercurial-scm.org/D10344
mercurial/cext/revlog.c
mercurial/interfaces/repository.py
mercurial/localrepo.py
mercurial/metadata.py
mercurial/pure/parsers.py
mercurial/revlog.py
mercurial/utils/storageutil.py
tests/testlib/ext-sidedata-2.py
tests/testlib/ext-sidedata-3.py
tests/testlib/ext-sidedata-5.py
--- a/mercurial/cext/revlog.c	Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/cext/revlog.c	Thu Apr 08 16:55:17 2021 +0200
@@ -503,14 +503,14 @@
    inside the transaction that creates the given revision. */
 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
 {
-	uint64_t sidedata_offset;
+	uint64_t offset_flags, sidedata_offset;
 	int rev;
 	Py_ssize_t sidedata_comp_len;
 	char *data;
 #if LONG_MAX == 0x7fffffffL
-	const char *const sidedata_format = PY23("nKi", "nKi");
+	const char *const sidedata_format = PY23("nKiK", "nKiK");
 #else
-	const char *const sidedata_format = PY23("nki", "nki");
+	const char *const sidedata_format = PY23("nkik", "nkik");
 #endif
 
 	if (self->hdrsize == v1_hdrsize || self->inlined) {
@@ -525,7 +525,7 @@
 	}
 
 	if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
-	                      &sidedata_comp_len))
+	                      &sidedata_comp_len, &offset_flags))
 		return NULL;
 
 	if (rev < 0 || rev >= index_length(self)) {
@@ -542,6 +542,7 @@
 	/* Find the newly added node, offset from the "already on-disk" length
 	 */
 	data = self->added + self->hdrsize * (rev - self->length);
+	putbe64(offset_flags, data);
 	putbe64(sidedata_offset, data + 64);
 	putbe32(sidedata_comp_len, data + 72);
 
--- a/mercurial/interfaces/repository.py	Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/interfaces/repository.py	Thu Apr 08 16:55:17 2021 +0200
@@ -1856,7 +1856,7 @@
     def savecommitmessage(text):
         pass
 
-    def register_sidedata_computer(kind, category, keys, computer):
+    def register_sidedata_computer(kind, category, keys, computer, flags):
         pass
 
     def register_wanted_sidedata(category):
--- a/mercurial/localrepo.py	Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/localrepo.py	Thu Apr 08 16:55:17 2021 +0200
@@ -3370,9 +3370,9 @@
             return
         self._wanted_sidedata.add(pycompat.bytestr(category))
 
-    def register_sidedata_computer(self, kind, category, keys, computer):
+    def register_sidedata_computer(self, kind, category, keys, computer, flags):
         if kind not in revlogconst.ALL_KINDS:
-            msg = _(b"unexpected revlog kind %r.")
+            msg = _(b"unexpected revlog kind '%s'.")
             raise error.ProgrammingError(msg % kind)
         category = pycompat.bytestr(category)
         if category in self._sidedata_computers.get(kind, []):
@@ -3381,7 +3381,7 @@
             )
             raise error.ProgrammingError(msg % category)
         self._sidedata_computers.setdefault(kind, {})
-        self._sidedata_computers[kind][category] = (keys, computer)
+        self._sidedata_computers[kind][category] = (keys, computer, flags)
 
 
 # used to avoid circular references so destructors work
--- a/mercurial/metadata.py	Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/metadata.py	Thu Apr 08 16:55:17 2021 +0200
@@ -820,7 +820,9 @@
 
 
 def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):
-    return _getsidedata(repo, rev)[0]
+    sidedata, has_copies_info = _getsidedata(repo, rev)
+    flags_to_add = sidedataflag.REVIDX_HASCOPIESINFO if has_copies_info else 0
+    return sidedata, (flags_to_add, 0)
 
 
 def set_sidedata_spec_for_repo(repo):
@@ -831,6 +833,7 @@
         sidedatamod.SD_FILES,
         (sidedatamod.SD_FILES,),
         copies_sidedata_computer,
+        sidedataflag.REVIDX_HASCOPIESINFO,
     )
 
 
--- a/mercurial/pure/parsers.py	Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/pure/parsers.py	Thu Apr 08 16:55:17 2021 +0200
@@ -268,7 +268,9 @@
     index_format = revlog_constants.INDEX_ENTRY_V2
     null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
 
-    def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
+    def replace_sidedata_info(
+        self, i, sidedata_offset, sidedata_length, offset_flags
+    ):
         """
         Replace an existing index entry's sidedata offset and length with new
         ones.
@@ -283,7 +285,8 @@
         if i >= self._lgt:
             packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
             old = self._extra[i - self._lgt]
-            new = old[:64] + packed + old[64 + packed_size :]
+            offset_flags = struct.pack(b">Q", offset_flags)
+            new = offset_flags + old[8:64] + packed + old[64 + packed_size :]
             self._extra[i - self._lgt] = new
         else:
             msg = b"cannot rewrite entries outside of this transaction"
--- a/mercurial/revlog.py	Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/revlog.py	Thu Apr 08 16:55:17 2021 +0200
@@ -3105,7 +3105,7 @@
             current_offset = fp.tell()
             for rev in range(startrev, endrev + 1):
                 entry = self.index[rev]
-                new_sidedata = storageutil.run_sidedata_helpers(
+                new_sidedata, flags = storageutil.run_sidedata_helpers(
                     store=self,
                     sidedata_helpers=helpers,
                     sidedata={},
@@ -3121,7 +3121,11 @@
                     # revlog.
                     msg = b"Rewriting existing sidedata is not supported yet"
                     raise error.Abort(msg)
-                entry = entry[:8]
+
+                # Apply (potential) flags to add and to remove after running
+                # the sidedata helpers
+                new_offset_flags = entry[0] | flags[0] & ~flags[1]
+                entry = (new_offset_flags,) + entry[1:8]
                 entry += (current_offset, len(serialized_sidedata))
 
                 fp.write(serialized_sidedata)
@@ -3131,9 +3135,9 @@
         # rewrite the new index entries
         with self._indexfp(b'w+') as fp:
             fp.seek(startrev * self.index.entry_size)
-            for i, entry in enumerate(new_entries):
+            for i, e in enumerate(new_entries):
                 rev = startrev + i
-                self.index.replace_sidedata_info(rev, entry[8], entry[9])
+                self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
                 packed = self.index.entry_binary(rev)
                 if rev == 0:
                     header = self.index.pack_header(self.version)
--- a/mercurial/utils/storageutil.py	Sat Apr 10 11:27:40 2021 +0200
+++ b/mercurial/utils/storageutil.py	Thu Apr 08 16:55:17 2021 +0200
@@ -366,12 +366,17 @@
         If not None, means that sidedata should be included.
         A dictionary of revlog type to tuples of `(repo, computers, removers)`:
             * `repo` is used as an argument for computers
-            * `computers` is a list of `(category, (keys, computer)` that
+            * `computers` is a list of `(category, (keys, computer, flags)` that
                compute the missing sidedata categories that were asked:
                * `category` is the sidedata category
                * `keys` are the sidedata keys to be affected
+               * `flags` is a bitmask (an integer) of flags to remove when
+                  removing the category.
                * `computer` is the function `(repo, store, rev, sidedata)` that
-                 returns a new sidedata dict.
+                 returns a tuple of
+                 `(new sidedata dict, (flags to add, flags to remove))`.
+                 For example, it will return `({}, (0, 1 << 15))` to return no
+                 sidedata, with no flags to add and one flag to remove.
             * `removers` will remove the keys corresponding to the categories
               that are present, but not needed.
         If both `computers` and `removers` are empty, sidedata are simply not
@@ -491,12 +496,13 @@
                 available.add(rev)
 
         serialized_sidedata = None
+        sidedata_flags = (0, 0)
         if sidedata_helpers:
-            sidedata = store.sidedata(rev)
-            sidedata = run_sidedata_helpers(
+            old_sidedata = store.sidedata(rev)
+            sidedata, sidedata_flags = run_sidedata_helpers(
                 store=store,
                 sidedata_helpers=sidedata_helpers,
-                sidedata=sidedata,
+                sidedata=old_sidedata,
                 rev=rev,
             )
             if sidedata:
@@ -507,6 +513,8 @@
         if serialized_sidedata:
             # Advertise that sidedata exists to the other side
             protocol_flags |= CG_FLAG_SIDEDATA
+            # Computers and removers can return flags to add and/or remove
+            flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
 
         yield resultcls(
             node=node,
@@ -535,12 +543,17 @@
     """
     repo, sd_computers, sd_removers = sidedata_helpers
     kind = store.revlog_kind
-    for _keys, sd_computer in sd_computers.get(kind, []):
-        sidedata = sd_computer(repo, store, rev, sidedata)
-    for keys, _computer in sd_removers.get(kind, []):
+    flags_to_add = 0
+    flags_to_remove = 0
+    for _keys, sd_computer, _flags in sd_computers.get(kind, []):
+        sidedata, flags = sd_computer(repo, store, rev, sidedata)
+        flags_to_add |= flags[0]
+        flags_to_remove |= flags[1]
+    for keys, _computer, flags in sd_removers.get(kind, []):
         for key in keys:
             sidedata.pop(key, None)
-    return sidedata
+        flags_to_remove |= flags
+    return sidedata, (flags_to_add, flags_to_remove)
 
 
 def deltaiscensored(delta, baserev, baselenfn):
--- a/tests/testlib/ext-sidedata-2.py	Sat Apr 10 11:27:40 2021 +0200
+++ b/tests/testlib/ext-sidedata-2.py	Thu Apr 08 16:55:17 2021 +0200
@@ -16,13 +16,15 @@
 from mercurial.revlogutils import sidedata as sidedatamod
 from mercurial.revlogutils import constants
 
+NO_FLAGS = (0, 0)  # hoot
+
 
 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
     sidedata = sidedata.copy()
     if text is None:
         text = revlog.revision(rev)
     sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
@@ -31,7 +33,7 @@
         text = revlog.revision(rev)
     sha256 = hashlib.sha256(text).digest()
     sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def reposetup(ui, repo):
@@ -42,10 +44,12 @@
             sidedatamod.SD_TEST1,
             (sidedatamod.SD_TEST1,),
             compute_sidedata_1,
+            0,
         )
         repo.register_sidedata_computer(
             kind,
             sidedatamod.SD_TEST2,
             (sidedatamod.SD_TEST2,),
             compute_sidedata_2,
+            0,
         )
--- a/tests/testlib/ext-sidedata-3.py	Sat Apr 10 11:27:40 2021 +0200
+++ b/tests/testlib/ext-sidedata-3.py	Thu Apr 08 16:55:17 2021 +0200
@@ -22,13 +22,15 @@
 from mercurial.revlogutils import sidedata as sidedatamod
 from mercurial.revlogutils import constants
 
+NO_FLAGS = (0, 0)
+
 
 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
     sidedata = sidedata.copy()
     if text is None:
         text = revlog.revision(rev)
     sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
@@ -37,7 +39,7 @@
         text = revlog.revision(rev)
     sha256 = hashlib.sha256(text).digest()
     sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def compute_sidedata_3(repo, revlog, rev, sidedata, text=None):
@@ -46,7 +48,7 @@
         text = revlog.revision(rev)
     sha384 = hashlib.sha384(text).digest()
     sidedata[sidedatamod.SD_TEST3] = struct.pack('>48s', sha384)
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def wrapaddrevision(
@@ -55,8 +57,8 @@
     if kwargs.get('sidedata') is None:
         kwargs['sidedata'] = {}
     sd = kwargs['sidedata']
-    sd = compute_sidedata_1(None, self, None, sd, text=text)
-    kwargs['sidedata'] = compute_sidedata_2(None, self, None, sd, text=text)
+    sd, flags = compute_sidedata_1(None, self, None, sd, text=text)
+    kwargs['sidedata'] = compute_sidedata_2(None, self, None, sd, text=text)[0]
     return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
 
 
@@ -72,18 +74,21 @@
             sidedatamod.SD_TEST1,
             (sidedatamod.SD_TEST1,),
             compute_sidedata_1,
+            0,
         )
         repo.register_sidedata_computer(
             kind,
             sidedatamod.SD_TEST2,
             (sidedatamod.SD_TEST2,),
             compute_sidedata_2,
+            0,
         )
         repo.register_sidedata_computer(
             kind,
             sidedatamod.SD_TEST3,
             (sidedatamod.SD_TEST3,),
             compute_sidedata_3,
+            0,
         )
     repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
     repo.register_wanted_sidedata(sidedatamod.SD_TEST2)
--- a/tests/testlib/ext-sidedata-5.py	Sat Apr 10 11:27:40 2021 +0200
+++ b/tests/testlib/ext-sidedata-5.py	Thu Apr 08 16:55:17 2021 +0200
@@ -23,13 +23,15 @@
 from mercurial.revlogutils import sidedata as sidedatamod
 from mercurial.revlogutils import constants
 
+NO_FLAGS = (0, 0)
+
 
 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
     sidedata = sidedata.copy()
     if text is None:
         text = revlog.revision(rev)
     sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
@@ -38,7 +40,7 @@
         text = revlog.revision(rev)
     sha256 = hashlib.sha256(text).digest()
     sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def reposetup(ui, repo):
@@ -49,12 +51,14 @@
             sidedatamod.SD_TEST1,
             (sidedatamod.SD_TEST1,),
             compute_sidedata_1,
+            0,
         )
         repo.register_sidedata_computer(
             kind,
             sidedatamod.SD_TEST2,
             (sidedatamod.SD_TEST2,),
             compute_sidedata_2,
+            0,
         )
 
     # We don't register sidedata computers because we don't care within these