mercurial/manifest.py
changeset 42380 12bd4e2d4d06
parent 42347 6310180662f5
parent 42378 c3484ddbdb96
child 42462 bc4373babd04
equal deleted inserted replaced
42379:e2e507573c7c 42380:12bd4e2d4d06
    33 )
    33 )
    34 
    34 
    35 parsers = policy.importmod(r'parsers')
    35 parsers = policy.importmod(r'parsers')
    36 propertycache = util.propertycache
    36 propertycache = util.propertycache
    37 
    37 
       
    38 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
       
    39 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
       
    40 
    38 def _parse(data):
    41 def _parse(data):
    39     # This method does a little bit of excessive-looking
    42     # This method does a little bit of excessive-looking
    40     # precondition checking. This is so that the behavior of this
    43     # precondition checking. This is so that the behavior of this
    41     # class exactly matches its C counterpart to try and help
    44     # class exactly matches its C counterpart to try and help
    42     # prevent surprise breakage for anyone that develops against
    45     # prevent surprise breakage for anyone that develops against
   121 
   124 
   122 def _cmp(a, b):
   125 def _cmp(a, b):
   123     return (a > b) - (a < b)
   126     return (a > b) - (a < b)
   124 
   127 
   125 class _lazymanifest(object):
   128 class _lazymanifest(object):
   126     def __init__(self, data, positions=None, extrainfo=None, extradata=None):
   129     """A pure python manifest backed by a byte string.  It is supplimented with
       
   130     internal lists as it is modified, until it is compacted back to a pure byte
       
   131     string.
       
   132 
       
   133     ``data`` is the initial manifest data.
       
   134 
       
   135     ``positions`` is a list of offsets, one per manifest entry.  Positive
       
   136     values are offsets into ``data``, negative values are offsets into the
       
   137     ``extradata`` list.  When an entry is removed, its entry is dropped from
       
   138     ``positions``.  The values are encoded such that when walking the list and
       
   139     indexing into ``data`` or ``extradata`` as appropriate, the entries are
       
   140     sorted by filename.
       
   141 
       
   142     ``extradata`` is a list of (key, hash, flags) for entries that were added or
       
   143     modified since the manifest was created or compacted.
       
   144     """
       
   145     def __init__(self, data, positions=None, extrainfo=None, extradata=None,
       
   146                  hasremovals=False):
   127         if positions is None:
   147         if positions is None:
   128             self.positions = self.findlines(data)
   148             self.positions = self.findlines(data)
   129             self.extrainfo = [0] * len(self.positions)
   149             self.extrainfo = [0] * len(self.positions)
   130             self.data = data
   150             self.data = data
   131             self.extradata = []
   151             self.extradata = []
       
   152             self.hasremovals = False
   132         else:
   153         else:
   133             self.positions = positions[:]
   154             self.positions = positions[:]
   134             self.extrainfo = extrainfo[:]
   155             self.extrainfo = extrainfo[:]
   135             self.extradata = extradata[:]
   156             self.extradata = extradata[:]
   136             self.data = data
   157             self.data = data
       
   158             self.hasremovals = hasremovals
   137 
   159 
   138     def findlines(self, data):
   160     def findlines(self, data):
   139         if not data:
   161         if not data:
   140             return []
   162             return []
   141         pos = data.find("\n")
   163         pos = data.find("\n")
   238             raise KeyError
   260             raise KeyError
   239         cur = self.positions[needle]
   261         cur = self.positions[needle]
   240         self.positions = self.positions[:needle] + self.positions[needle + 1:]
   262         self.positions = self.positions[:needle] + self.positions[needle + 1:]
   241         self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
   263         self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
   242         if cur >= 0:
   264         if cur >= 0:
       
   265             # This does NOT unsort the list as far as the search functions are
       
   266             # concerned, as they only examine lines mapped by self.positions.
   243             self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
   267             self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
       
   268             self.hasremovals = True
   244 
   269 
   245     def __setitem__(self, key, value):
   270     def __setitem__(self, key, value):
   246         if not isinstance(key, bytes):
   271         if not isinstance(key, bytes):
   247             raise TypeError("setitem: manifest keys must be a byte string.")
   272             raise TypeError("setitem: manifest keys must be a byte string.")
   248         if not isinstance(value, tuple) or len(value) != 2:
   273         if not isinstance(value, tuple) or len(value) != 2:
   274                               self.extrainfo[needle:])
   299                               self.extrainfo[needle:])
   275 
   300 
   276     def copy(self):
   301     def copy(self):
   277         # XXX call _compact like in C?
   302         # XXX call _compact like in C?
   278         return _lazymanifest(self.data, self.positions, self.extrainfo,
   303         return _lazymanifest(self.data, self.positions, self.extrainfo,
   279             self.extradata)
   304             self.extradata, self.hasremovals)
   280 
   305 
   281     def _compact(self):
   306     def _compact(self):
   282         # hopefully not called TOO often
   307         # hopefully not called TOO often
   283         if len(self.extradata) == 0:
   308         if len(self.extradata) == 0 and not self.hasremovals:
   284             return
   309             return
   285         l = []
   310         l = []
   286         i = 0
   311         i = 0
   287         offset = 0
   312         offset = 0
   288         self.extrainfo = [0] * len(self.positions)
   313         self.extrainfo = [0] * len(self.positions)
   289         while i < len(self.positions):
   314         while i < len(self.positions):
   290             if self.positions[i] >= 0:
   315             if self.positions[i] >= 0:
   291                 cur = self.positions[i]
   316                 cur = self.positions[i]
   292                 last_cut = cur
   317                 last_cut = cur
       
   318 
       
   319                 # Collect all contiguous entries in the buffer at the current
       
   320                 # offset, breaking out only for added/modified items held in
       
   321                 # extradata, or a deleted line prior to the next position.
   293                 while True:
   322                 while True:
   294                     self.positions[i] = offset
   323                     self.positions[i] = offset
   295                     i += 1
   324                     i += 1
   296                     if i == len(self.positions) or self.positions[i] < 0:
   325                     if i == len(self.positions) or self.positions[i] < 0:
   297                         break
   326                         break
       
   327 
       
   328                     # A removed file has no positions[] entry, but does have an
       
   329                     # overwritten first byte.  Break out and find the end of the
       
   330                     # current good entry/entries if there is a removed file
       
   331                     # before the next position.
       
   332                     if (self.hasremovals
       
   333                         and self.data.find('\n\x00', cur,
       
   334                                            self.positions[i]) != -1):
       
   335                         break
       
   336 
   298                     offset += self.positions[i] - cur
   337                     offset += self.positions[i] - cur
   299                     cur = self.positions[i]
   338                     cur = self.positions[i]
   300                 end_cut = self.data.find('\n', cur)
   339                 end_cut = self.data.find('\n', cur)
   301                 if end_cut != -1:
   340                 if end_cut != -1:
   302                     end_cut += 1
   341                     end_cut += 1
   311                     if len(t[1]) > 20:
   350                     if len(t[1]) > 20:
   312                         self.extrainfo[i] = ord(t[1][21])
   351                         self.extrainfo[i] = ord(t[1][21])
   313                     offset += len(l[-1])
   352                     offset += len(l[-1])
   314                     i += 1
   353                     i += 1
   315         self.data = ''.join(l)
   354         self.data = ''.join(l)
       
   355         self.hasremovals = False
   316         self.extradata = []
   356         self.extradata = []
   317 
   357 
   318     def _pack(self, d):
   358     def _pack(self, d):
   319         return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n'
   359         return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n'
   320 
   360 
   556         start = 0
   596         start = 0
   557         # zero copy representation of base as a buffer
   597         # zero copy representation of base as a buffer
   558         addbuf = util.buffer(base)
   598         addbuf = util.buffer(base)
   559 
   599 
   560         changes = list(changes)
   600         changes = list(changes)
   561         if len(changes) < 1000:
   601         if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
   562             # start with a readonly loop that finds the offset of
   602             # start with a readonly loop that finds the offset of
   563             # each line and creates the deltas
   603             # each line and creates the deltas
   564             for f, todelete in changes:
   604             for f, todelete in changes:
   565                 # bs will either be the index of the item or the insert point
   605                 # bs will either be the index of the item or the insert point
   566                 start, end = _msearch(addbuf, f, start)
   606                 start, end = _msearch(addbuf, f, start)