copies: add config option for writing copy metadata to file and/or changset
authorMartin von Zweigbergk <martinvonz@google.com>
Wed, 27 Dec 2017 19:49:36 -0800
changeset 42141 0e41f40b01cc
parent 42140 a4483e380c3e
child 42142 5382d8f8530b
copies: add config option for writing copy metadata to file and/or changset This introduces a config option that lets you choose to write copy metadata to the changeset extras instead of to filelog. There's also an option to write it to both places. I imagine that may possibly be useful when transitioning an existing repo. The copy metadata is stored as two fields in extras: one for copies since p1 and one for copies since p2. I may need to add more information later in order to make copy tracing faster. Specifically, I'm thinking out recording which files were added or removed so that copies._chaincopies() doesn't have to look at the manifest for that. But that would just be an optimization and that can be added once we know if it's necessary. I have also considered saving space by using replacing the destination file path by an index into the "files" list, but that can also be changed later (but before the feature is ready to release). Differential Revision: https://phab.mercurial-scm.org/D6183
mercurial/changelog.py
mercurial/configitems.py
mercurial/localrepo.py
tests/test-annotate.t
tests/test-copies-in-changeset.t
tests/test-fastannotate-hg.t
--- a/mercurial/changelog.py	Thu Apr 04 13:46:49 2019 +0200
+++ b/mercurial/changelog.py	Wed Dec 27 19:49:36 2017 -0800
@@ -80,6 +80,13 @@
     ]
     return "\0".join(items)
 
+def encodecopies(copies):
+    items = [
+        '%s\0%s' % (k, copies[k])
+        for k in sorted(copies)
+    ]
+    return "\n".join(items)
+
 def stripdesc(desc):
     """strip trailing whitespace and leading and trailing empty lines"""
     return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
@@ -533,7 +540,7 @@
         return l[3:]
 
     def add(self, manifest, files, desc, transaction, p1, p2,
-                  user, date=None, extra=None):
+                  user, date=None, extra=None, p1copies=None, p2copies=None):
         # Convert to UTF-8 encoded bytestrings as the very first
         # thing: calling any method on a localstr object will turn it
         # into a str object and the cached UTF-8 string is thus lost.
@@ -562,6 +569,13 @@
             elif branch in (".", "null", "tip"):
                 raise error.StorageError(_('the name \'%s\' is reserved')
                                          % branch)
+        if (p1copies or p2copies) and extra is None:
+            extra = {}
+        if p1copies:
+            extra['p1copies'] = encodecopies(p1copies)
+        if p2copies:
+            extra['p2copies'] = encodecopies(p2copies)
+
         if extra:
             extra = encodeextra(extra)
             parseddate = "%s %s" % (parseddate, extra)
--- a/mercurial/configitems.py	Thu Apr 04 13:46:49 2019 +0200
+++ b/mercurial/configitems.py	Wed Dec 27 19:49:36 2017 -0800
@@ -488,6 +488,9 @@
 coreconfigitem('experimental', 'copies.read-from',
     default="filelog-only",
 )
+coreconfigitem('experimental', 'copies.write-to',
+    default='filelog-only',
+)
 coreconfigitem('experimental', 'crecordtest',
     default=None,
 )
--- a/mercurial/localrepo.py	Thu Apr 04 13:46:49 2019 +0200
+++ b/mercurial/localrepo.py	Wed Dec 27 19:49:36 2017 -0800
@@ -2324,7 +2324,8 @@
         """Returns the wlock if it's held, or None if it's not."""
         return self._currentlock(self._wlockref)
 
-    def _filecommit(self, fctx, manifest1, manifest2, linkrev, tr, changelist):
+    def _filecommit(self, fctx, manifest1, manifest2, linkrev, tr, changelist,
+                    includecopymeta):
         """
         commit an individual file as part of a larger transaction
         """
@@ -2383,8 +2384,9 @@
 
             if cnode:
                 self.ui.debug(" %s: copy %s:%s\n" % (fname, cfname, hex(cnode)))
-                meta["copy"] = cfname
-                meta["copyrev"] = hex(cnode)
+                if includecopymeta:
+                    meta["copy"] = cfname
+                    meta["copyrev"] = hex(cnode)
                 fparent1, fparent2 = nullid, newfparent
             else:
                 self.ui.warn(_("warning: can't find ancestor for '%s' "
@@ -2552,6 +2554,12 @@
         p1, p2 = ctx.p1(), ctx.p2()
         user = ctx.user()
 
+        writecopiesto = self.ui.config('experimental', 'copies.write-to')
+        writefilecopymeta = writecopiesto != 'changeset-only'
+        p1copies, p2copies = None, None
+        if writecopiesto in ('changeset-only', 'compatibility'):
+            p1copies = ctx.p1copies()
+            p2copies = ctx.p2copies()
         with self.lock(), self.transaction("commit") as tr:
             trp = weakref.proxy(tr)
 
@@ -2585,7 +2593,8 @@
                         else:
                             added.append(f)
                             m[f] = self._filecommit(fctx, m1, m2, linkrev,
-                                                    trp, changed)
+                                                    trp, changed,
+                                                    writefilecopymeta)
                             m.setflag(f, fctx.flags())
                     except OSError:
                         self.ui.warn(_("trouble committing %s!\n") %
@@ -2639,7 +2648,8 @@
             self.changelog.delayupdate(tr)
             n = self.changelog.add(mn, files, ctx.description(),
                                    trp, p1.node(), p2.node(),
-                                   user, ctx.date(), ctx.extra().copy())
+                                   user, ctx.date(), ctx.extra().copy(),
+                                   p1copies, p2copies)
             xp1, xp2 = p1.hex(), p2 and p2.hex() or ''
             self.hook('pretxncommit', throw=True, node=hex(n), parent1=xp1,
                       parent2=xp2)
--- a/tests/test-annotate.t	Thu Apr 04 13:46:49 2019 +0200
+++ b/tests/test-annotate.t	Wed Dec 27 19:49:36 2017 -0800
@@ -438,7 +438,7 @@
   > def reposetup(ui, repo):
   >     class legacyrepo(repo.__class__):
   >         def _filecommit(self, fctx, manifest1, manifest2,
-  >                         linkrev, tr, changelist):
+  >                         linkrev, tr, changelist, includecopymeta):
   >             fname = fctx.path()
   >             text = fctx.data()
   >             flog = self.file(fname)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-copies-in-changeset.t	Wed Dec 27 19:49:36 2017 -0800
@@ -0,0 +1,105 @@
+
+  $ cat >> $HGRCPATH << EOF
+  > [experimental]
+  > copies.write-to=changeset-only
+  > [alias]
+  > changesetcopies = log -r . -T 'files: {files}
+  >   {extras % "{ifcontains("copies", key, "{key}: {value}\n")}"}'
+  > EOF
+
+Check that copies are recorded correctly
+
+  $ hg init repo
+  $ cd repo
+  $ echo a > a
+  $ hg add a
+  $ hg ci -m initial
+  $ hg cp a b
+  $ hg cp a c
+  $ hg cp a d
+  $ hg ci -m 'copy a to b, c, and d'
+  $ hg changesetcopies
+  files: b c d
+  p1copies: b\x00a (esc)
+  c\x00a (esc)
+  d\x00a (esc)
+
+Check that renames are recorded correctly
+
+  $ hg mv b b2
+  $ hg ci -m 'rename b to b2'
+  $ hg changesetcopies
+  files: b b2
+  p1copies: b2\x00b (esc)
+
+Rename onto existing file. This should get recorded in the changeset files list and in the extras,
+even though there is no filelog entry.
+
+  $ hg cp b2 c --force
+  $ hg st --copies
+  M c
+    b2
+  $ hg debugindex c
+     rev linkrev nodeid       p1           p2
+       0       1 b789fdd96dc2 000000000000 000000000000
+  $ hg ci -m 'move b onto d'
+  $ hg changesetcopies
+  files: c
+  p1copies: c\x00b2 (esc)
+  $ hg debugindex c
+     rev linkrev nodeid       p1           p2
+       0       1 b789fdd96dc2 000000000000 000000000000
+
+Create a merge commit with copying done during merge.
+
+  $ hg co 0
+  0 files updated, 0 files merged, 3 files removed, 0 files unresolved
+  $ hg cp a e
+  $ hg cp a f
+  $ hg ci -m 'copy a to e and f'
+  created new head
+  $ hg merge 3
+  3 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  (branch merge, don't forget to commit)
+File 'a' exists on both sides, so 'g' could be recorded as being from p1 or p2, but we currently
+always record it as being from p1
+  $ hg cp a g
+File 'd' exists only in p2, so 'h' should be from p2
+  $ hg cp d h
+File 'f' exists only in p1, so 'i' should be from p1
+  $ hg cp f i
+  $ hg ci -m 'merge'
+  $ hg changesetcopies
+  files: g h i
+  p1copies: g\x00a (esc)
+  i\x00f (esc)
+  p2copies: h\x00d (esc)
+
+Test writing to both changeset and filelog
+
+  $ hg cp a j
+  $ hg ci -m 'copy a to j' --config experimental.copies.write-to=compatibility
+  $ hg changesetcopies
+  files: j
+  p1copies: j\x00a (esc)
+  $ hg debugdata j 0
+  \x01 (esc)
+  copy: a
+  copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
+  \x01 (esc)
+  a
+
+Test writing only to filelog
+
+  $ hg cp a k
+  $ hg ci -m 'copy a to k' --config experimental.copies.write-to=filelog-only
+  $ hg changesetcopies
+  files: k
+  $ hg debugdata k 0
+  \x01 (esc)
+  copy: a
+  copyrev: b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3
+  \x01 (esc)
+  a
+
+  $ cd ..
--- a/tests/test-fastannotate-hg.t	Thu Apr 04 13:46:49 2019 +0200
+++ b/tests/test-fastannotate-hg.t	Wed Dec 27 19:49:36 2017 -0800
@@ -443,7 +443,7 @@
   > def reposetup(ui, repo):
   >     class legacyrepo(repo.__class__):
   >         def _filecommit(self, fctx, manifest1, manifest2,
-  >                         linkrev, tr, changelist):
+  >                         linkrev, tr, changelist, includecopymeta):
   >             fname = fctx.path()
   >             text = fctx.data()
   >             flog = self.file(fname)