commit: use `dirstate.change_files` to scope the associated `addremove`
authorPierre-Yves David <pierre-yves.david@octobus.net>
Wed, 15 Feb 2023 11:51:58 +0100
changeset 50029 28dfb2df4ab9
parent 50028 a46dfc2b58a3
child 50030 5deac4eaa7d1
commit: use `dirstate.change_files` to scope the associated `addremove` This was significantly more complicated than I expected, because multiple extensions get in the way. I introduced a context that lazily open the transaction and associated context to work around these complication. See the inline documentation for details. Introducing the wrapping transaction remove the need for dirstate-guard (one of the ultimate goal of all this), and slightly affect the result of a `hg rollback` after a `hg commit --addremove`. That last part is deemed fine. It aligns the behavior with what happens after a failed `hg commit --addremove` and nobody should be using `hg rollback` anyway. The small output change in the test come from the different transaction timing and fact the transaction now backup the dirstate before the addremove, which might mean "no file to backup" when the repository starts from an empty state.
hgext/largefiles/overrides.py
mercurial/cmdutil.py
mercurial/scmutil.py
tests/test-fncache.t
tests/test-inherit-mode.t
tests/test-racy-mutations.t
--- a/hgext/largefiles/overrides.py	Sun Feb 05 15:38:23 2023 +0100
+++ b/hgext/largefiles/overrides.py	Wed Feb 15 11:51:58 2023 +0100
@@ -1545,11 +1545,19 @@
 
 
 @eh.wrapfunction(scmutil, b'addremove')
-def scmutiladdremove(orig, repo, matcher, prefix, uipathfn, opts=None):
+def scmutiladdremove(
+    orig,
+    repo,
+    matcher,
+    prefix,
+    uipathfn,
+    opts=None,
+    open_tr=None,
+):
     if opts is None:
         opts = {}
     if not lfutil.islfilesrepo(repo):
-        return orig(repo, matcher, prefix, uipathfn, opts)
+        return orig(repo, matcher, prefix, uipathfn, opts, open_tr=open_tr)
     # Get the list of missing largefiles so we can remove them
     lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
     unsure, s, mtime_boundary = lfdirstate.status(
@@ -1560,6 +1568,10 @@
         unknown=False,
     )
 
+    # open the transaction and changing_files context
+    if open_tr is not None:
+        open_tr()
+
     # Call into the normal remove code, but the removing of the standin, we want
     # to have handled by original addremove.  Monkey patching here makes sure
     # we don't remove the standin in the largefiles code, preventing a very
@@ -1592,7 +1604,8 @@
     # function to take care of the rest.  Make sure it doesn't do anything with
     # largefiles by passing a matcher that will ignore them.
     matcher = composenormalfilematcher(matcher, repo[None].manifest(), added)
-    return orig(repo, matcher, prefix, uipathfn, opts)
+
+    return orig(repo, matcher, prefix, uipathfn, opts, open_tr=open_tr)
 
 
 # Calling purge with --all will cause the largefiles to be deleted.
--- a/mercurial/cmdutil.py	Sun Feb 05 15:38:23 2023 +0100
+++ b/mercurial/cmdutil.py	Wed Feb 15 11:51:58 2023 +0100
@@ -29,7 +29,6 @@
     changelog,
     copies,
     crecord as crecordmod,
-    dirstateguard,
     encoding,
     error,
     formatter,
@@ -2789,21 +2788,114 @@
     return err
 
 
+class _AddRemoveContext:
+    """a small (hacky) context to deal with lazy opening of context
+
+    This is to be used in the `commit` function right below. This deals with
+    lazily open a `changing_files` context inside a `transaction` that span the
+    full commit operation.
+
+    We need :
+    - a `changing_files` context to wrap the dirstate change within the
+      "addremove" operation,
+    - a transaction to make sure these change are not written right after the
+      addremove, but when the commit operation succeed.
+
+    However it get complicated because:
+    - opening a transaction "this early" shuffle hooks order, especially the
+      `precommit` one happening after the `pretxtopen` one which I am not too
+      enthusiastic about.
+    - the `mq` extensions + the `record` extension stacks many layers of call
+      to implement `qrefresh --interactive` and this result with `mq` calling a
+      `strip` in the middle of this function. Which prevent the existence of
+      transaction wrapping all of its function code. (however, `qrefresh` never
+      call the `addremove` bits.
+    - the largefile extensions (and maybe other extensions?) wraps `addremove`
+      so slicing `addremove` in smaller bits is a complex endeavour.
+
+    So I eventually took a this shortcut that open the transaction if we
+    actually needs it, not disturbing much of the rest of the code.
+
+    It will result in some hooks order change for `hg commit --addremove`,
+    however it seems a corner case enough to ignore that for now (hopefully).
+
+    Notes that None of the above problems seems insurmountable, however I have
+    been fighting with this specific piece of code for a couple of day already
+    and I need a solution to keep moving forward on the bigger work around
+    `changing_files` context that is being introduced at the same time as this
+    hack.
+
+    Each problem seems to have a solution:
+    - the hook order issue could be solved by refactoring the many-layer stack
+      that currently composes a commit and calling them earlier,
+    - the mq issue could be solved by refactoring `mq` so that the final strip
+      is done after transaction closure. Be warned that the mq code is quite
+      antic however.
+    - large-file could be reworked in parallel of the `addremove` to be
+      friendlier to this.
+
+    However each of these tasks are too much a diversion right now. In addition
+    they will be much easier to undertake when the `changing_files` dust has
+    settled."""
+
+    def __init__(self, repo):
+        self._repo = repo
+        self._transaction = None
+        self._dirstate_context = None
+        self._state = None
+
+    def __enter__(self):
+        assert self._state is None
+        self._state = True
+        return self
+
+    def open_transaction(self):
+        """open a `transaction` and `changing_files` context
+
+        Call this when you know that change to the dirstate will be needed and
+        we need to open the transaction early
+
+        This will also open the dirstate `changing_files` context, so you should
+        call `close_dirstate_context` when the distate changes are done.
+        """
+        assert self._state is not None
+        if self._transaction is None:
+            self._transaction = self._repo.transaction(b'commit')
+            self._transaction.__enter__()
+        if self._dirstate_context is None:
+            self._dirstate_context = self._repo.dirstate.changing_files(
+                self._repo
+            )
+            self._dirstate_context.__enter__()
+
+    def close_dirstate_context(self):
+        """close the change_files if any
+
+        Call this after the (potential) `open_transaction` call to close the
+        (potential) changing_files context.
+        """
+        if self._dirstate_context is not None:
+            self._dirstate_context.__exit__(None, None, None)
+            self._dirstate_context = None
+
+    def __exit__(self, *args):
+        if self._dirstate_context is not None:
+            self._dirstate_context.__exit__(*args)
+        if self._transaction is not None:
+            self._transaction.__exit__(*args)
+
+
 def commit(ui, repo, commitfunc, pats, opts):
     '''commit the specified files or all outstanding changes'''
     date = opts.get(b'date')
     if date:
         opts[b'date'] = dateutil.parsedate(date)
 
-    dsguard = None
-    # extract addremove carefully -- this function can be called from a command
-    # that doesn't support addremove
-    if opts.get(b'addremove'):
-        dsguard = dirstateguard.dirstateguard(repo, b'commit')
-    with dsguard or util.nullcontextmanager():
+    with repo.wlock(), repo.lock():
         message = logmessage(ui, opts)
         matcher = scmutil.match(repo[None], pats, opts)
-        if True:
+
+        with _AddRemoveContext(repo) as c:
             # extract addremove carefully -- this function can be called from a
             # command that doesn't support addremove
             if opts.get(b'addremove'):
@@ -2818,11 +2910,12 @@
                     b"",
                     uipathfn,
                     opts,
+                    open_tr=c.open_transaction,
                 )
                 m = _(b"failed to mark all new/missing files as added/removed")
                 if r != 0:
                     raise error.Abort(m)
-
+            c.close_dirstate_context()
             return commitfunc(ui, repo, message, matcher, opts)
 
 
--- a/mercurial/scmutil.py	Sun Feb 05 15:38:23 2023 +0100
+++ b/mercurial/scmutil.py	Wed Feb 15 11:51:58 2023 +0100
@@ -1219,7 +1219,7 @@
                 )
 
 
-def addremove(repo, matcher, prefix, uipathfn, opts=None):
+def addremove(repo, matcher, prefix, uipathfn, opts=None, open_tr=None):
     if opts is None:
         opts = {}
     m = matcher
@@ -1279,7 +1279,9 @@
         repo, m, added + unknown, removed + deleted, similarity, uipathfn
     )
 
-    if not dry_run:
+    if not dry_run and (unknown or forgotten or deleted or renames):
+        if open_tr is not None:
+            open_tr()
         _markchanges(repo, unknown + forgotten, deleted, renames)
 
     for f in rejected:
--- a/tests/test-fncache.t	Sun Feb 05 15:38:23 2023 +0100
+++ b/tests/test-fncache.t	Wed Feb 15 11:51:58 2023 +0100
@@ -103,12 +103,10 @@
   .hg/phaseroots
   .hg/requires
   .hg/undo
-  .hg/undo.backup.dirstate
   .hg/undo.backupfiles
   .hg/undo.bookmarks
   .hg/undo.branch
   .hg/undo.desc
-  .hg/undo.dirstate
   .hg/undo.phaseroots
   .hg/wcache
   .hg/wcache/checkisexec (execbit !)
@@ -147,11 +145,9 @@
   .hg/store/undo
   .hg/store/undo.backupfiles
   .hg/store/undo.phaseroots
-  .hg/undo.backup.dirstate
   .hg/undo.bookmarks
   .hg/undo.branch
   .hg/undo.desc
-  .hg/undo.dirstate
   .hg/wcache
   .hg/wcache/checkisexec (execbit !)
   .hg/wcache/checklink (symlink !)
--- a/tests/test-inherit-mode.t	Sun Feb 05 15:38:23 2023 +0100
+++ b/tests/test-inherit-mode.t	Wed Feb 15 11:51:58 2023 +0100
@@ -95,11 +95,9 @@
   00660 ./.hg/store/undo
   00660 ./.hg/store/undo.backupfiles
   00660 ./.hg/store/undo.phaseroots
-  00660 ./.hg/undo.backup.dirstate
   00660 ./.hg/undo.bookmarks
   00660 ./.hg/undo.branch
   00660 ./.hg/undo.desc
-  00660 ./.hg/undo.dirstate
   00770 ./.hg/wcache/
   00711 ./.hg/wcache/checkisexec
   007.. ./.hg/wcache/checklink (re)
--- a/tests/test-racy-mutations.t	Sun Feb 05 15:38:23 2023 +0100
+++ b/tests/test-racy-mutations.t	Wed Feb 15 11:51:58 2023 +0100
@@ -106,10 +106,10 @@
 
 #if fail-if-detected
   $ cat .foo_commit_out
+  note: commit message saved in .hg/last-message.txt
+  note: use 'hg commit --logfile .hg/last-message.txt --edit' to reuse it
   transaction abort!
   rollback completed
-  note: commit message saved in .hg/last-message.txt
-  note: use 'hg commit --logfile .hg/last-message.txt --edit' to reuse it
   abort: 00changelog.i: file cursor at position 249, expected 121
 And no corruption in the changelog.
   $ hg debugrevlogindex -c