cmdutil: rewrite walkchangerevs() by using logcmdutil functions
authorYuya Nishihara <yuya@tcha.org>
Thu, 10 Sep 2020 18:01:43 +0900
changeset 45650 0356b41fe01d
parent 45649 bba730d7a6f4
child 45651 c7413ffe0402
cmdutil: rewrite walkchangerevs() by using logcmdutil functions cmdutil.walkchangerevs() now takes (revs, makefilematcher) in place of (match, opts), and only provides the "windowing" functionality. Unused classes and functions will be removed by the next patch. "hg grep --follow" (--all-files) is still broken since there is no logic to follow copies while traversing changelog, but at least, it does follow the DAG.
hgext/churn.py
mercurial/cmdutil.py
mercurial/commands.py
tests/test-grep.t
--- a/hgext/churn.py	Thu Sep 10 17:14:03 2020 +0900
+++ b/hgext/churn.py	Thu Sep 10 18:01:43 2020 +0900
@@ -23,7 +23,6 @@
     patch,
     pycompat,
     registrar,
-    scmutil,
 )
 from mercurial.utils import dateutil
 
@@ -76,8 +75,6 @@
     if opts.get(b'date'):
         df = dateutil.matchdate(opts[b'date'])
 
-    m = scmutil.match(repo[None], pats, opts)
-
     def prep(ctx, fmatch):
         rev = ctx.rev()
         if df and not df(ctx.date()[0]):  # doesn't match date format
@@ -99,7 +96,15 @@
 
         progress.increment()
 
-    for ctx in cmdutil.walkchangerevs(repo, m, opts, prep):
+    wopts = logcmdutil.walkopts(
+        pats=pats,
+        opts=opts,
+        revspec=opts[b'rev'],
+        include_pats=opts[b'include'],
+        exclude_pats=opts[b'exclude'],
+    )
+    revs, makefilematcher = logcmdutil.makewalker(repo, wopts)
+    for ctx in cmdutil.walkchangerevs(repo, revs, makefilematcher, prep):
         continue
 
     progress.complete()
--- a/mercurial/cmdutil.py	Thu Sep 10 17:14:03 2020 +0900
+++ b/mercurial/cmdutil.py	Thu Sep 10 18:01:43 2020 +0900
@@ -2428,8 +2428,8 @@
         return False
 
 
-def walkchangerevs(repo, match, opts, prepare):
-    '''Iterate over files and the revs in which they changed.
+def walkchangerevs(repo, revs, makefilematcher, prepare):
+    '''Iterate over files and the revs in a "windowed" way.
 
     Callers most commonly need to iterate backwards over the history
     in which they are interested. Doing so has awful (quadratic-looking)
@@ -2443,107 +2443,11 @@
     yielding each context, the iterator will first call the prepare
     function on each context in the window in forward order.'''
 
-    allfiles = opts.get(b'all_files')
-    follow = opts.get(b'follow') or opts.get(b'follow_first')
-    revs = _walkrevs(repo, opts)
     if not revs:
         return []
-    wanted = set()
-    slowpath = match.anypats() or (not match.always() and opts.get(b'removed'))
-    fncache = {}
     change = repo.__getitem__
 
-    # First step is to fill wanted, the set of revisions that we want to yield.
-    # When it does not induce extra cost, we also fill fncache for revisions in
-    # wanted: a cache of filenames that were changed (ctx.files()) and that
-    # match the file filtering conditions.
-
-    if match.always() or allfiles:
-        # No files, no patterns.  Display all revs.
-        wanted = revs
-    elif not slowpath:
-        # We only have to read through the filelog to find wanted revisions
-
-        try:
-            wanted = walkfilerevs(repo, match, follow, revs, fncache)
-        except FileWalkError:
-            slowpath = True
-
-            # We decided to fall back to the slowpath because at least one
-            # of the paths was not a file. Check to see if at least one of them
-            # existed in history, otherwise simply return
-            for path in match.files():
-                if path == b'.' or path in repo.store:
-                    break
-            else:
-                return []
-
-    if slowpath:
-        # We have to read the changelog to match filenames against
-        # changed files
-
-        if follow:
-            raise error.Abort(
-                _(b'can only follow copies/renames for explicit filenames')
-            )
-
-        # The slow path checks files modified in every changeset.
-        # This is really slow on large repos, so compute the set lazily.
-        class lazywantedset(object):
-            def __init__(self):
-                self.set = set()
-                self.revs = set(revs)
-
-            # No need to worry about locality here because it will be accessed
-            # in the same order as the increasing window below.
-            def __contains__(self, value):
-                if value in self.set:
-                    return True
-                elif not value in self.revs:
-                    return False
-                else:
-                    self.revs.discard(value)
-                    ctx = change(value)
-                    if allfiles:
-                        matches = list(ctx.manifest().walk(match))
-                    else:
-                        matches = [f for f in ctx.files() if match(f)]
-                    if matches:
-                        fncache[value] = matches
-                        self.set.add(value)
-                        return True
-                    return False
-
-            def discard(self, value):
-                self.revs.discard(value)
-                self.set.discard(value)
-
-        wanted = lazywantedset()
-
-    # it might be worthwhile to do this in the iterator if the rev range
-    # is descending and the prune args are all within that range
-    for rev in opts.get(b'prune', ()):
-        rev = repo[rev].rev()
-        ff = _followfilter(repo)
-        stop = min(revs[0], revs[-1])
-        for x in pycompat.xrange(rev, stop - 1, -1):
-            if ff.match(x):
-                wanted = wanted - [x]
-
-    # Now that wanted is correctly initialized, we can iterate over the
-    # revision range, yielding only revisions in wanted.
     def iterate():
-        if follow and match.always():
-            ff = _followfilter(repo, onlyfirst=opts.get(b'follow_first'))
-
-            def want(rev):
-                return ff.match(rev) and rev in wanted
-
-        else:
-
-            def want(rev):
-                return rev in wanted
-
         it = iter(revs)
         stopiteration = False
         for windowsize in increasingwindows():
@@ -2553,28 +2457,10 @@
                 if rev is None:
                     stopiteration = True
                     break
-                elif want(rev):
-                    nrevs.append(rev)
+                nrevs.append(rev)
             for rev in sorted(nrevs):
-                fns = fncache.get(rev)
                 ctx = change(rev)
-                if not fns:
-
-                    def fns_generator():
-                        if allfiles:
-
-                            def bad(f, msg):
-                                pass
-
-                            for f in ctx.matches(matchmod.badmatch(match, bad)):
-                                yield f
-                        else:
-                            for f in ctx.files():
-                                if match(f):
-                                    yield f
-
-                    fns = fns_generator()
-                prepare(ctx, scmutil.matchfiles(repo, fns))
+                prepare(ctx, makefilematcher(ctx))
             for rev in nrevs:
                 yield change(rev)
 
--- a/mercurial/commands.py	Thu Sep 10 17:14:03 2020 +0900
+++ b/mercurial/commands.py	Thu Sep 10 18:01:43 2020 +0900
@@ -3579,7 +3579,6 @@
 
     skip = set()
     revfiles = {}
-    match = scmutil.match(repo[None], pats, opts)
     found = False
     follow = opts.get(b'follow')
 
@@ -3654,9 +3653,21 @@
                     if pfn not in matches[parent] and pfn in pctx:
                         grepbody(pfn, parent, readfile(pctx, pfn))
 
+    wopts = logcmdutil.walkopts(
+        pats=pats,
+        opts=opts,
+        revspec=opts[b'rev'],
+        include_pats=opts[b'include'],
+        exclude_pats=opts[b'exclude'],
+        follow=follow,
+        force_changelog_traversal=all_files,
+        filter_revisions_by_pats=not all_files,
+    )
+    revs, makefilematcher = logcmdutil.makewalker(repo, wopts)
+
     ui.pager(b'grep')
     fm = ui.formatter(b'grep', opts)
-    for ctx in cmdutil.walkchangerevs(repo, match, opts, prep):
+    for ctx in cmdutil.walkchangerevs(repo, revs, makefilematcher, prep):
         rev = ctx.rev()
         parent = ctx.p1().rev()
         for fn in sorted(revfiles.get(rev, [])):
--- a/tests/test-grep.t	Thu Sep 10 17:14:03 2020 +0900
+++ b/tests/test-grep.t	Thu Sep 10 18:01:43 2020 +0900
@@ -990,7 +990,6 @@
   1: A add0-cp1, A add0-cp1-mod1, A add0-cp1-mod1-rm3, M add0-mod1, R add0-rm1
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history
  BROKEN: should not abort because of removed file
   $ hg grep --diff -fr'wdir()' data
   add0-cp4-mod4:2147483647:+:data4
@@ -1063,10 +1062,12 @@
   1: A add0-cp1, A add0-cp1-mod1, A add0-cp1-mod1-rm3, M add0-mod1, R add0-rm1
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should include the revision 1
   $ hg grep --diff -fr'1+2' data
   add0-cp2-mod2:2:+:data2
   add0-mod2:2:+:data2
+  add0-cp1-mod1:1:+:data1
+  add0-cp1-mod1-rm3:1:+:data1
+  add0-mod1:1:+:data1
   add0:0:+:data0
   add0-mod1:0:+:data0
   add0-mod2:0:+:data0
@@ -1076,7 +1077,6 @@
   add0-rm2:0:+:data0
   add0-rm4:0:+:data0
 
- BROKEN: should include the revision 1
   $ hg grep -fr'1+2' data
   add0:2:data0
   add0-cp2:2:data0
@@ -1089,6 +1089,19 @@
   add0-mod4:2:data0
   add0-rm1:2:data0
   add0-rm4:2:data0
+  add0:1:data0
+  add0-cp1:1:data0
+  add0-cp1-mod1:1:data0
+  add0-cp1-mod1:1:data1
+  add0-cp1-mod1-rm3:1:data0
+  add0-cp1-mod1-rm3:1:data1
+  add0-mod1:1:data0
+  add0-mod1:1:data1
+  add0-mod2:1:data0
+  add0-mod3:1:data0
+  add0-mod4:1:data0
+  add0-rm2:1:data0
+  add0-rm4:1:data0
   add0:0:data0
   add0-mod1:0:data0
   add0-mod2:0:data0
@@ -1108,11 +1121,9 @@
   add0-mod3:3:+:data3
   add0-mod3:0:+:data0
 
- BROKEN: should not include the revision 2
   $ hg grep -f data add0-mod3
   add0-mod3:3:data0
   add0-mod3:3:data3
-  add0-mod3:2:data0
   add0-mod3:1:data0
   add0-mod3:0:data0
 
@@ -1124,10 +1135,8 @@
   $ hg grep --diff -f data add0-mod4
   add0-mod4:0:+:data0
 
- BROKEN: should not include the revision 2
   $ hg grep -f data add0-mod4
   add0-mod4:3:data0
-  add0-mod4:2:data0
   add0-mod4:1:data0
   add0-mod4:0:data0
 
@@ -1170,7 +1179,7 @@
   [255]
 
   $ hg grep --diff -f data add0-cp4
-  abort: cannot follow file not in parent revision: "add0-cp4"
+  abort: cannot follow nonexistent file: "add0-cp4"
   [255]
 
  BROKEN: maybe better to abort
@@ -1199,7 +1208,7 @@
   [255]
 
   $ hg grep --diff -fr. data add0-cp1-mod1-rm3
-  abort: cannot follow file not in parent revision: "add0-cp1-mod1-rm3"
+  abort: cannot follow file not in any of the specified revisions: "add0-cp1-mod1-rm3"
   [255]
 
  BROKEN: should abort
@@ -1213,14 +1222,13 @@
   abort: cannot follow file not in parent revision: "add0-rm4"
   [255]
 
- BROKEN: may be okay, but different behavior from "hg log"
   $ hg grep --diff -f data add0-rm4
-  add0-rm4:0:+:data0
+  abort: cannot follow file not in parent revision: "add0-rm4"
+  [255]
 
- BROKEN: should not include the revision 2, and maybe better to abort
+ BROKEN: should abort
   $ hg grep -f data add0-rm4
   add0-rm4:3:data0
-  add0-rm4:2:data0
   add0-rm4:1:data0
   add0-rm4:0:data0
 
@@ -1250,14 +1258,12 @@
   add0:0:+:data0
   add0-mod3:0:+:data0
 
- BROKEN: should not include the revision 2
  BROKEN: should follow history across renames
   $ hg grep -f data add0-mod3 add0-cp1-mod1
   add0-cp1-mod1:3:data0
   add0-cp1-mod1:3:data1
   add0-mod3:3:data0
   add0-mod3:3:data3
-  add0-mod3:2:data0
   add0-cp1-mod1:1:data0
   add0-cp1-mod1:1:data1
   add0-mod3:1:data0
@@ -1269,8 +1275,8 @@
   2: A add0-cp2, A add0-cp2-mod2, M add0-mod2, R add0-rm2
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should include the revision 2
   $ hg grep --diff -fr2 data add0-mod2
+  add0-mod2:2:+:data2
   add0-mod2:0:+:data0
 
   $ hg grep -fr2 data add0-mod2
@@ -1284,10 +1290,8 @@
   2: A add0-cp2, A add0-cp2-mod2, M add0-mod2, R add0-rm2
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history from the specified revision
   $ hg grep --diff -fr2 data add0-cp2
-  abort: cannot follow file not in parent revision: "add0-cp2"
-  [255]
+  add0:0:+:data0
 
  BROKEN: should follow history across renames
   $ hg grep -fr2 data add0-cp2
@@ -1299,10 +1303,9 @@
   2: A add0-cp2, A add0-cp2-mod2, M add0-mod2, R add0-rm2
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history from the specified revision
   $ hg grep --diff -fr2 data add0-cp2-mod2
-  abort: cannot follow file not in parent revision: "add0-cp2-mod2"
-  [255]
+  add0-cp2-mod2:2:+:data2
+  add0:0:+:data0
 
  BROKEN: should follow history across renames
   $ hg grep -fr2 data add0-cp2-mod2
@@ -1315,9 +1318,9 @@
   abort: cannot follow file not in any of the specified revisions: "add0-rm2"
   [255]
 
- BROKEN: should abort
   $ hg grep --diff -fr2 data add0-rm2
-  add0-rm2:0:+:data0
+  abort: cannot follow file not in any of the specified revisions: "add0-rm2"
+  [255]
 
  BROKEN: should abort
   $ hg grep -fr2 data add0-rm2
@@ -1329,10 +1332,10 @@
   2: A add0-cp2, A add0-cp2-mod2, M add0-mod2, R add0-rm2
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history from the specified revision
   $ hg grep --diff -fr2 data add0-cp2 add0-mod2
-  abort: cannot follow file not in parent revision: "add0-cp2"
-  [255]
+  add0-mod2:2:+:data2
+  add0:0:+:data0
+  add0-mod2:0:+:data0
 
  BROKEN: should follow history across renames
   $ hg grep -fr2 data add0-cp2 add0-mod2
@@ -1366,8 +1369,8 @@
   2147483647: A add0-cp4, A add0-cp4-mod4, M add0-mod4, R add0-rm4
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should include the changes in wdir
   $ hg grep --diff -fr'wdir()' data add0-mod4
+  add0-mod4:2147483647:+:data4
   add0-mod4:0:+:data0
 
   $ hg grep -fr'wdir()' data add0-mod4
@@ -1383,10 +1386,8 @@
   2147483647: A add0-cp4, A add0-cp4-mod4, M add0-mod4, R add0-rm4
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history
   $ hg grep --diff -fr'wdir()' data add0-cp4
-  abort: cannot follow file not in parent revision: "add0-cp4"
-  [255]
+  add0:0:+:data0
 
  BROKEN: should follow history across renames
   $ hg grep -fr'wdir()' data add0-cp4
@@ -1398,10 +1399,9 @@
   2147483647: A add0-cp4, A add0-cp4-mod4, M add0-mod4, R add0-rm4
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history
   $ hg grep --diff -fr'wdir()' data add0-cp4-mod4
-  abort: cannot follow file not in parent revision: "add0-cp4-mod4"
-  [255]
+  add0-cp4-mod4:2147483647:+:data4
+  add0:0:+:data0
 
  BROKEN: should follow history across renames
   $ hg grep -fr'wdir()' data add0-cp4-mod4
@@ -1415,10 +1415,12 @@
   3: A add0-cp1-cp3, A add0-cp1-mod1-cp3-mod3, R add0-cp1-mod1-rm3, M add0-mod3
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history
   $ hg grep --diff -fr'wdir()' data add0-cp4 add0-mod4 add0-mod3
-  abort: cannot follow file not in parent revision: "add0-cp4"
-  [255]
+  add0-mod4:2147483647:+:data4
+  add0-mod3:3:+:data3
+  add0:0:+:data0
+  add0-mod3:0:+:data0
+  add0-mod4:0:+:data0
 
  BROKEN: should follow history across renames
   $ hg grep -fr'wdir()' data add0-cp4 add0-mod4 add0-mod3