sidedatacopies: only store an entry if it has values
authorPierre-Yves David <pierre-yves.david@octobus.net>
Wed, 02 Oct 2019 14:38:34 -0400
changeset 43231 30570a056fa8
parent 43230 e51f5d06a99c
child 43232 be178b5d91c8
sidedatacopies: only store an entry if it has values This will make for a shorter storage and help use to write faster code in simple case. This change already provided a speed boost for copy tracing. For example here is combined time of running copies tracing on a 6989 pairs of revision on the pypy repos: before: 771s after: 631s - 18% This also has a very positive impact on changelog size. For example here are the number for the `00changelog.d` file of pypy. before: 30449712 Bytes after: 24973718 Bytes - 18% This give an overall quite acceptable overhead for storing copies into the changelog: filelog-only: 23370586 sidedata: 24973718 Bytes + 7% Differential Revision: https://phab.mercurial-scm.org/D7068
mercurial/changelog.py
mercurial/copies.py
tests/test-copies-in-changeset.t
--- a/mercurial/changelog.py	Wed Oct 02 14:16:30 2019 -0400
+++ b/mercurial/changelog.py	Wed Oct 02 14:38:34 2019 -0400
@@ -693,14 +693,16 @@
                 extra[b'filesremoved'] = filesremoved
         elif self._copiesstorage == b'changeset-sidedata':
             sidedata = {}
-            if p1copies is not None:
+            if p1copies:
                 sidedata[sidedatamod.SD_P1COPIES] = p1copies
-            if p2copies is not None:
+            if p2copies:
                 sidedata[sidedatamod.SD_P2COPIES] = p2copies
-            if filesadded is not None:
+            if filesadded:
                 sidedata[sidedatamod.SD_FILESADDED] = filesadded
-            if filesremoved is not None:
+            if filesremoved:
                 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
+            if not sidedata:
+                sidedata = None
 
         if extra:
             extra = encodeextra(extra)
--- a/mercurial/copies.py	Wed Oct 02 14:16:30 2019 -0400
+++ b/mercurial/copies.py	Wed Oct 02 14:38:34 2019 -0400
@@ -890,10 +890,14 @@
         p2copies = encodecopies(sortedfiles, p2copies)
         filesadded = encodefileindices(sortedfiles, filesadded)
         filesremoved = encodefileindices(sortedfiles, filesremoved)
-        sidedata[sidedatamod.SD_P1COPIES] = p1copies
-        sidedata[sidedatamod.SD_P2COPIES] = p2copies
-        sidedata[sidedatamod.SD_FILESADDED] = filesadded
-        sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
+        if p1copies:
+            sidedata[sidedatamod.SD_P1COPIES] = p1copies
+        if p2copies:
+            sidedata[sidedatamod.SD_P2COPIES] = p2copies
+        if filesadded:
+            sidedata[sidedatamod.SD_FILESADDED] = filesadded
+        if filesremoved:
+            sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
     return sidedata
 
 
--- a/tests/test-copies-in-changeset.t	Wed Oct 02 14:16:30 2019 -0400
+++ b/tests/test-copies-in-changeset.t	Wed Oct 02 14:38:34 2019 -0400
@@ -77,15 +77,11 @@
   2\x00a (esc)
 #else
   $ hg debugsidedata -c -v -- -1
-  4 sidedata entries
+  2 sidedata entries
    entry-0010 size 11
     '0\x00a\n1\x00a\n2\x00a'
-   entry-0011 size 0
-    ''
    entry-0012 size 5
     '0\n1\n2'
-   entry-0013 size 0
-    ''
 #endif
 
   $ hg showcopies
@@ -119,11 +115,9 @@
 
 #else
   $ hg debugsidedata -c -v -- -1
-  4 sidedata entries
+  3 sidedata entries
    entry-0010 size 3
     '1\x00b'
-   entry-0011 size 0
-    ''
    entry-0012 size 1
     '1'
    entry-0013 size 1
@@ -168,15 +162,9 @@
 
 #else
   $ hg debugsidedata -c -v -- -1
-  4 sidedata entries
+  1 sidedata entries
    entry-0010 size 4
     '0\x00b2'
-   entry-0011 size 0
-    ''
-   entry-0012 size 0
-    ''
-   entry-0013 size 0
-    ''
 #endif
 
   $ hg showcopies
@@ -231,15 +219,13 @@
 
 #else
   $ hg debugsidedata -c -v -- -1
-  4 sidedata entries
+  3 sidedata entries
    entry-0010 size 7
     '0\x00a\n2\x00f'
    entry-0011 size 3
     '1\x00d'
    entry-0012 size 5
     '0\n1\n2'
-   entry-0013 size 0
-    ''
 #endif
 
   $ hg showcopies
@@ -262,15 +248,11 @@
 #else
   $ hg ci -m 'copy a to j'
   $ hg debugsidedata -c -v -- -1
-  4 sidedata entries
+  2 sidedata entries
    entry-0010 size 3
     '0\x00a'
-   entry-0011 size 0
-    ''
    entry-0012 size 1
     '0'
-   entry-0013 size 0
-    ''
 #endif
   $ hg debugdata j 0
   \x01 (esc)
@@ -297,15 +279,11 @@
   $ hg ci --amend -m 'copy a to j, v2'
   saved backup bundle to $TESTTMP/repo/.hg/strip-backup/*-*-amend.hg (glob)
   $ hg debugsidedata -c -v -- -1
-  4 sidedata entries
+  2 sidedata entries
    entry-0010 size 3
     '0\x00a'
-   entry-0011 size 0
-    ''
    entry-0012 size 1
     '0'
-   entry-0013 size 0
-    ''
 #endif
   $ hg showcopies --config experimental.copies.read-from=filelog-only
   a -> j
@@ -324,15 +302,6 @@
 #else
   $ hg ci -m 'modify j'
   $ hg debugsidedata -c -v -- -1
-  4 sidedata entries
-   entry-0010 size 0
-    ''
-   entry-0011 size 0
-    ''
-   entry-0012 size 0
-    ''
-   entry-0013 size 0
-    ''
 #endif
 
 Test writing only to filelog
@@ -347,15 +316,11 @@
 #else
   $ hg ci -m 'copy a to k'
   $ hg debugsidedata -c -v -- -1
-  4 sidedata entries
+  2 sidedata entries
    entry-0010 size 3
     '0\x00a'
-   entry-0011 size 0
-    ''
    entry-0012 size 1
     '0'
-   entry-0013 size 0
-    ''
 #endif
 
   $ hg debugdata k 0
@@ -470,16 +435,10 @@
   compression:       zlib   zlib    zlib
   compression-level: default default default
   $ hg debugsidedata -c -- 0
-  4 sidedata entries
-   entry-0010 size 0
-   entry-0011 size 0
+  1 sidedata entries
    entry-0012 size 1
-   entry-0013 size 0
   $ hg debugsidedata -c -- 1
-  4 sidedata entries
-   entry-0010 size 0
-   entry-0011 size 0
-   entry-0012 size 0
+  1 sidedata entries
    entry-0013 size 1
   $ hg debugsidedata -m -- 0
   $ cat << EOF > .hg/hgrc
@@ -522,16 +481,10 @@
   compression:       zlib   zlib    zlib
   compression-level: default default default
   $ hg debugsidedata -c -- 0
-  4 sidedata entries
-   entry-0010 size 0
-   entry-0011 size 0
+  1 sidedata entries
    entry-0012 size 1
-   entry-0013 size 0
   $ hg debugsidedata -c -- 1
-  4 sidedata entries
-   entry-0010 size 0
-   entry-0011 size 0
-   entry-0012 size 0
+  1 sidedata entries
    entry-0013 size 1
   $ hg debugsidedata -m -- 0