merge with stable
authorAugie Fackler <augie@google.com>
Tue, 28 May 2019 09:57:53 -0400
changeset 42380 12bd4e2d4d06
parent 42379 e2e507573c7c (current diff)
parent 42378 c3484ddbdb96 (diff)
child 42381 e3ee707d42ad
merge with stable
mercurial/manifest.py
--- a/mercurial/manifest.py	Sat May 25 19:49:44 2019 +0300
+++ b/mercurial/manifest.py	Tue May 28 09:57:53 2019 -0400
@@ -35,6 +35,9 @@
 parsers = policy.importmod(r'parsers')
 propertycache = util.propertycache
 
+# Allow tests to more easily test the alternate path in manifestdict.fastdelta()
+FASTDELTA_TEXTDIFF_THRESHOLD = 1000
+
 def _parse(data):
     # This method does a little bit of excessive-looking
     # precondition checking. This is so that the behavior of this
@@ -123,17 +126,36 @@
     return (a > b) - (a < b)
 
 class _lazymanifest(object):
-    def __init__(self, data, positions=None, extrainfo=None, extradata=None):
+    """A pure python manifest backed by a byte string.  It is supplimented with
+    internal lists as it is modified, until it is compacted back to a pure byte
+    string.
+
+    ``data`` is the initial manifest data.
+
+    ``positions`` is a list of offsets, one per manifest entry.  Positive
+    values are offsets into ``data``, negative values are offsets into the
+    ``extradata`` list.  When an entry is removed, its entry is dropped from
+    ``positions``.  The values are encoded such that when walking the list and
+    indexing into ``data`` or ``extradata`` as appropriate, the entries are
+    sorted by filename.
+
+    ``extradata`` is a list of (key, hash, flags) for entries that were added or
+    modified since the manifest was created or compacted.
+    """
+    def __init__(self, data, positions=None, extrainfo=None, extradata=None,
+                 hasremovals=False):
         if positions is None:
             self.positions = self.findlines(data)
             self.extrainfo = [0] * len(self.positions)
             self.data = data
             self.extradata = []
+            self.hasremovals = False
         else:
             self.positions = positions[:]
             self.extrainfo = extrainfo[:]
             self.extradata = extradata[:]
             self.data = data
+            self.hasremovals = hasremovals
 
     def findlines(self, data):
         if not data:
@@ -240,7 +262,10 @@
         self.positions = self.positions[:needle] + self.positions[needle + 1:]
         self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
         if cur >= 0:
+            # This does NOT unsort the list as far as the search functions are
+            # concerned, as they only examine lines mapped by self.positions.
             self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
+            self.hasremovals = True
 
     def __setitem__(self, key, value):
         if not isinstance(key, bytes):
@@ -276,11 +301,11 @@
     def copy(self):
         # XXX call _compact like in C?
         return _lazymanifest(self.data, self.positions, self.extrainfo,
-            self.extradata)
+            self.extradata, self.hasremovals)
 
     def _compact(self):
         # hopefully not called TOO often
-        if len(self.extradata) == 0:
+        if len(self.extradata) == 0 and not self.hasremovals:
             return
         l = []
         i = 0
@@ -290,11 +315,25 @@
             if self.positions[i] >= 0:
                 cur = self.positions[i]
                 last_cut = cur
+
+                # Collect all contiguous entries in the buffer at the current
+                # offset, breaking out only for added/modified items held in
+                # extradata, or a deleted line prior to the next position.
                 while True:
                     self.positions[i] = offset
                     i += 1
                     if i == len(self.positions) or self.positions[i] < 0:
                         break
+
+                    # A removed file has no positions[] entry, but does have an
+                    # overwritten first byte.  Break out and find the end of the
+                    # current good entry/entries if there is a removed file
+                    # before the next position.
+                    if (self.hasremovals
+                        and self.data.find('\n\x00', cur,
+                                           self.positions[i]) != -1):
+                        break
+
                     offset += self.positions[i] - cur
                     cur = self.positions[i]
                 end_cut = self.data.find('\n', cur)
@@ -313,6 +352,7 @@
                     offset += len(l[-1])
                     i += 1
         self.data = ''.join(l)
+        self.hasremovals = False
         self.extradata = []
 
     def _pack(self, d):
@@ -558,7 +598,7 @@
         addbuf = util.buffer(base)
 
         changes = list(changes)
-        if len(changes) < 1000:
+        if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
             # start with a readonly loop that finds the offset of
             # each line and creates the deltas
             for f, todelete in changes:
--- a/tests/test-manifest.t	Sat May 25 19:49:44 2019 +0300
+++ b/tests/test-manifest.t	Tue May 28 09:57:53 2019 -0400
@@ -201,3 +201,78 @@
   total cache data size 425 bytes, on-disk 425 bytes
   $ hg log -r '0' --debug | grep 'manifest:'
   manifest:    0:fce2a30dedad1eef4da95ca1dc0004157aa527cf
+
+Test file removal (especially with pure).  The tests are crafted such that there
+will be contiguous spans of existing entries to ensure that is handled properly.
+(In this case, a.txt, aa.txt and c.txt, cc.txt, and ccc.txt)
+
+  $ cat > $TESTTMP/manifest.py <<EOF
+  > from mercurial import (
+  >     extensions,
+  >     manifest,
+  > )
+  > def extsetup(ui):
+  >     manifest.FASTDELTA_TEXTDIFF_THRESHOLD = 0
+  > EOF
+  $ cat >> $HGRCPATH <<EOF
+  > [extensions]
+  > manifest = $TESTTMP/manifest.py
+  > EOF
+
+Pure removes should actually remove all dropped entries
+
+  $ hg init repo
+  $ cd repo
+  $ echo a > a.txt
+  $ echo aa > aa.txt
+  $ echo b > b.txt
+  $ echo c > c.txt
+  $ echo c > cc.txt
+  $ echo c > ccc.txt
+  $ echo b > d.txt
+  $ echo c > e.txt
+  $ hg ci -Aqm 'a-e'
+
+  $ hg rm b.txt d.txt
+  $ hg ci -m 'remove b and d'
+
+  $ hg debugdata -m 1
+  a.txt\x00b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3 (esc)
+  aa.txt\x00a4bdc161c8fbb523c9a60409603f8710ff49a571 (esc)
+  c.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc)
+  cc.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc)
+  ccc.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc)
+  e.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc)
+
+  $ hg up -qC .
+
+  $ hg verify
+  checking changesets
+  checking manifests
+  crosschecking files in changesets and manifests
+  checking files
+  checked 2 changesets with 8 changes to 8 files
+
+  $ hg rollback -q --config ui.rollback=True
+  $ hg rm b.txt d.txt
+  $ echo bb > bb.txt
+
+A mix of adds and removes should remove all dropped entries.
+
+  $ hg ci -Aqm 'remove b and d; add bb'
+
+  $ hg debugdata -m 1
+  a.txt\x00b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3 (esc)
+  aa.txt\x00a4bdc161c8fbb523c9a60409603f8710ff49a571 (esc)
+  bb.txt\x0004c6faf8a9fdd848a5304dfc1704749a374dff44 (esc)
+  c.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc)
+  cc.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc)
+  ccc.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc)
+  e.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc)
+
+  $ hg verify
+  checking changesets
+  checking manifests
+  crosschecking files in changesets and manifests
+  checking files
+  checked 2 changesets with 9 changes to 9 files