dirstate: add --minimal flag to debugrebuilddirstate
authorDurham Goode <durham@fb.com>
Wed, 12 Aug 2015 19:44:21 -0700
changeset 26024 84c00f03e06c
parent 26023 48671378daeb
child 26030 5243890224ff
dirstate: add --minimal flag to debugrebuilddirstate On repositories with hundreds of thousands of files, hg debugrebuilddirstate causes every dirstate entry to be marked lookup, and the next hg status can take many minutes. This adds a --minimal flag that allows us to only rebuild the parts of the dirstate that are inconsistent. This follows two rules: 1) If a file is in the dirstate but not in the parent manifest, and it is not marked 'add', it is busted and we should drop it. 2) If a file is not in the dirstate at all, but it is in the parent manifest, it should be added to the dirstate and we need to mark it as lookup. This allows us to fix repositories where the dirstate doesn't match the manifest much more quickly. Tested by artificially adding bad dirstate entries (via code) for both cases above.
mercurial/commands.py
tests/test-completion.t
--- a/mercurial/commands.py	Thu Aug 13 22:10:52 2015 +0900
+++ b/mercurial/commands.py	Wed Aug 12 19:44:21 2015 -0700
@@ -2700,9 +2700,12 @@
               pa.distance(pb), rel))
 
 @command('debugrebuilddirstate|debugrebuildstate',
-    [('r', 'rev', '', _('revision to rebuild to'), _('REV'))],
+    [('r', 'rev', '', _('revision to rebuild to'), _('REV')),
+     ('', 'minimal', None, _('only rebuild files that are inconsistent with '
+                             'the working copy parent')),
+    ],
     _('[-r REV]'))
-def debugrebuilddirstate(ui, repo, rev):
+def debugrebuilddirstate(ui, repo, rev, **opts):
     """rebuild the dirstate as it would look like for the given revision
 
     If no revision is specified the first current parent will be used.
@@ -2711,13 +2714,33 @@
     The actual working directory content or existing dirstate
     information such as adds or removes is not considered.
 
+    ``minimal`` will only rebuild the dirstate status for files that claim to be
+    tracked but are not in the parent manifest, or that exist in the parent
+    manifest but are not in the dirstate. It will not change adds, removes, or
+    modified files that are in the working copy parent.
+
     One use of this command is to make the next :hg:`status` invocation
     check the actual file content.
     """
     ctx = scmutil.revsingle(repo, rev)
     wlock = repo.wlock()
     try:
-        repo.dirstate.rebuild(ctx.node(), ctx.manifest())
+        dirstate = repo.dirstate
+
+        # See command doc for what minimal does.
+        if opts.get('minimal'):
+            dirstatefiles = set(dirstate)
+            ctxfiles = set(ctx.manifest().keys())
+            for file in (dirstatefiles | ctxfiles):
+                indirstate = file in dirstatefiles
+                inctx = file in ctxfiles
+
+                if indirstate and not inctx and dirstate[file] != 'a':
+                    dirstate.drop(file)
+                elif inctx and not indirstate:
+                    dirstate.normallookup(file)
+        else:
+            dirstate.rebuild(ctx.node(), ctx.manifest())
     finally:
         wlock.release()
 
--- a/tests/test-completion.t	Thu Aug 13 22:10:52 2015 +0900
+++ b/tests/test-completion.t	Wed Aug 12 19:44:21 2015 -0700
@@ -254,7 +254,7 @@
   debugpathcomplete: full, normal, added, removed
   debugpushkey: 
   debugpvec: 
-  debugrebuilddirstate: rev
+  debugrebuilddirstate: rev, minimal
   debugrebuildfncache: 
   debugrename: rev
   debugrevlog: changelog, manifest, dir, dump