hgext/remotefilelog/historypack.py
changeset 43076 2372284d9457
parent 41365 876494fd967d
child 43077 687b865b95ad
equal deleted inserted replaced
43075:57875cf423c9 43076:2372284d9457
    35 ANC_P1NODE = 1
    35 ANC_P1NODE = 1
    36 ANC_P2NODE = 2
    36 ANC_P2NODE = 2
    37 ANC_LINKNODE = 3
    37 ANC_LINKNODE = 3
    38 ANC_COPYFROM = 4
    38 ANC_COPYFROM = 4
    39 
    39 
       
    40 
    40 class historypackstore(basepack.basepackstore):
    41 class historypackstore(basepack.basepackstore):
    41     INDEXSUFFIX = INDEXSUFFIX
    42     INDEXSUFFIX = INDEXSUFFIX
    42     PACKSUFFIX = PACKSUFFIX
    43     PACKSUFFIX = PACKSUFFIX
    43 
    44 
    44     def getpack(self, path):
    45     def getpack(self, path):
    73                 pass
    74                 pass
    74 
    75 
    75         raise KeyError((name, node))
    76         raise KeyError((name, node))
    76 
    77 
    77     def add(self, filename, node, p1, p2, linknode, copyfrom):
    78     def add(self, filename, node, p1, p2, linknode, copyfrom):
    78         raise RuntimeError("cannot add to historypackstore (%s:%s)"
    79         raise RuntimeError(
    79                            % (filename, hex(node)))
    80             "cannot add to historypackstore (%s:%s)" % (filename, hex(node))
       
    81         )
       
    82 
    80 
    83 
    81 class historypack(basepack.basepack):
    84 class historypack(basepack.basepack):
    82     INDEXSUFFIX = INDEXSUFFIX
    85     INDEXSUFFIX = INDEXSUFFIX
    83     PACKSUFFIX = PACKSUFFIX
    86     PACKSUFFIX = PACKSUFFIX
    84 
    87 
   151 
   154 
   152                 yield (ancnode, p1node, p2node, entry[ANC_LINKNODE], copyfrom)
   155                 yield (ancnode, p1node, p2node, entry[ANC_LINKNODE], copyfrom)
   153 
   156 
   154     def _readentry(self, offset):
   157     def _readentry(self, offset):
   155         data = self._data
   158         data = self._data
   156         entry = struct.unpack(PACKFORMAT, data[offset:offset + PACKENTRYLENGTH])
   159         entry = struct.unpack(
       
   160             PACKFORMAT, data[offset : offset + PACKENTRYLENGTH]
       
   161         )
   157         copyfrom = None
   162         copyfrom = None
   158         copyfromlen = entry[ANC_COPYFROM]
   163         copyfromlen = entry[ANC_COPYFROM]
   159         if copyfromlen != 0:
   164         if copyfromlen != 0:
   160             offset += PACKENTRYLENGTH
   165             offset += PACKENTRYLENGTH
   161             copyfrom = data[offset:offset + copyfromlen]
   166             copyfrom = data[offset : offset + copyfromlen]
   162         return entry, copyfrom
   167         return entry, copyfrom
   163 
   168 
   164     def add(self, filename, node, p1, p2, linknode, copyfrom):
   169     def add(self, filename, node, p1, p2, linknode, copyfrom):
   165         raise RuntimeError("cannot add to historypack (%s:%s)" %
   170         raise RuntimeError(
   166                            (filename, hex(node)))
   171             "cannot add to historypack (%s:%s)" % (filename, hex(node))
       
   172         )
   167 
   173 
   168     def _findnode(self, name, node):
   174     def _findnode(self, name, node):
   169         if self.VERSION == 0:
   175         if self.VERSION == 0:
   170             ancestors = self._getancestors(name, node)
   176             ancestors = self._getancestors(name, node)
   171             for ancnode, p1node, p2node, linknode, copyfrom in ancestors:
   177             for ancnode, p1node, p2node, linknode, copyfrom in ancestors:
   172                 if ancnode == node:
   178                 if ancnode == node:
   173                     return (ancnode, p1node, p2node, linknode, copyfrom)
   179                     return (ancnode, p1node, p2node, linknode, copyfrom)
   174         else:
   180         else:
   175             section = self._findsection(name)
   181             section = self._findsection(name)
   176             nodeindexoffset, nodeindexsize = section[3:]
   182             nodeindexoffset, nodeindexsize = section[3:]
   177             entry = self._bisect(node, nodeindexoffset,
   183             entry = self._bisect(
   178                                  nodeindexoffset + nodeindexsize,
   184                 node,
   179                                  NODEINDEXENTRYLENGTH)
   185                 nodeindexoffset,
       
   186                 nodeindexoffset + nodeindexsize,
       
   187                 NODEINDEXENTRYLENGTH,
       
   188             )
   180             if entry is not None:
   189             if entry is not None:
   181                 node, offset = struct.unpack(NODEINDEXFORMAT, entry)
   190                 node, offset = struct.unpack(NODEINDEXFORMAT, entry)
   182                 entry, copyfrom = self._readentry(offset)
   191                 entry, copyfrom = self._readentry(offset)
   183                 # Drop the copyfromlen from the end of entry, and replace it
   192                 # Drop the copyfromlen from the end of entry, and replace it
   184                 # with the copyfrom string.
   193                 # with the copyfrom string.
   187         raise KeyError("unable to find history for %s:%s" % (name, hex(node)))
   196         raise KeyError("unable to find history for %s:%s" % (name, hex(node)))
   188 
   197 
   189     def _findsection(self, name):
   198     def _findsection(self, name):
   190         params = self.params
   199         params = self.params
   191         namehash = hashlib.sha1(name).digest()
   200         namehash = hashlib.sha1(name).digest()
   192         fanoutkey = struct.unpack(params.fanoutstruct,
   201         fanoutkey = struct.unpack(
   193                                   namehash[:params.fanoutprefix])[0]
   202             params.fanoutstruct, namehash[: params.fanoutprefix]
       
   203         )[0]
   194         fanout = self._fanouttable
   204         fanout = self._fanouttable
   195 
   205 
   196         start = fanout[fanoutkey] + params.indexstart
   206         start = fanout[fanoutkey] + params.indexstart
   197         indexend = self._indexend
   207         indexend = self._indexend
   198 
   208 
   207         if not entry:
   217         if not entry:
   208             raise KeyError(name)
   218             raise KeyError(name)
   209 
   219 
   210         rawentry = struct.unpack(self.INDEXFORMAT, entry)
   220         rawentry = struct.unpack(self.INDEXFORMAT, entry)
   211         x, offset, size, nodeindexoffset, nodeindexsize = rawentry
   221         x, offset, size, nodeindexoffset, nodeindexsize = rawentry
   212         rawnamelen = self._index[nodeindexoffset:nodeindexoffset +
   222         rawnamelen = self._index[
   213                                                  constants.FILENAMESIZE]
   223             nodeindexoffset : nodeindexoffset + constants.FILENAMESIZE
       
   224         ]
   214         actualnamelen = struct.unpack('!H', rawnamelen)[0]
   225         actualnamelen = struct.unpack('!H', rawnamelen)[0]
   215         nodeindexoffset += constants.FILENAMESIZE
   226         nodeindexoffset += constants.FILENAMESIZE
   216         actualname = self._index[nodeindexoffset:nodeindexoffset +
   227         actualname = self._index[
   217                                                  actualnamelen]
   228             nodeindexoffset : nodeindexoffset + actualnamelen
       
   229         ]
   218         if actualname != name:
   230         if actualname != name:
   219             raise KeyError("found file name %s when looking for %s" %
   231             raise KeyError(
   220                            (actualname, name))
   232                 "found file name %s when looking for %s" % (actualname, name)
       
   233             )
   221         nodeindexoffset += actualnamelen
   234         nodeindexoffset += actualnamelen
   222 
   235 
   223         filenamelength = struct.unpack('!H', self._data[offset:offset +
   236         filenamelength = struct.unpack(
   224                                                     constants.FILENAMESIZE])[0]
   237             '!H', self._data[offset : offset + constants.FILENAMESIZE]
       
   238         )[0]
   225         offset += constants.FILENAMESIZE
   239         offset += constants.FILENAMESIZE
   226 
   240 
   227         actualname = self._data[offset:offset + filenamelength]
   241         actualname = self._data[offset : offset + filenamelength]
   228         offset += filenamelength
   242         offset += filenamelength
   229 
   243 
   230         if name != actualname:
   244         if name != actualname:
   231             raise KeyError("found file name %s when looking for %s" %
   245             raise KeyError(
   232                            (actualname, name))
   246                 "found file name %s when looking for %s" % (actualname, name)
       
   247             )
   233 
   248 
   234         # Skip entry list size
   249         # Skip entry list size
   235         offset += ENTRYCOUNTSIZE
   250         offset += ENTRYCOUNTSIZE
   236 
   251 
   237         nodelistoffset = offset
   252         nodelistoffset = offset
   238         nodelistsize = (size - constants.FILENAMESIZE - filenamelength -
   253         nodelistsize = (
   239                         ENTRYCOUNTSIZE)
   254             size - constants.FILENAMESIZE - filenamelength - ENTRYCOUNTSIZE
   240         return (name, nodelistoffset, nodelistsize,
   255         )
   241                 nodeindexoffset, nodeindexsize)
   256         return (
       
   257             name,
       
   258             nodelistoffset,
       
   259             nodelistsize,
       
   260             nodeindexoffset,
       
   261             nodeindexsize,
       
   262         )
   242 
   263 
   243     def _bisect(self, node, start, end, entrylen):
   264     def _bisect(self, node, start, end, entrylen):
   244         # Bisect between start and end to find node
   265         # Bisect between start and end to find node
   245         origstart = start
   266         origstart = start
   246         startnode = self._index[start:start + NODELENGTH]
   267         startnode = self._index[start : start + NODELENGTH]
   247         endnode = self._index[end:end + NODELENGTH]
   268         endnode = self._index[end : end + NODELENGTH]
   248 
   269 
   249         if startnode == node:
   270         if startnode == node:
   250             return self._index[start:start + entrylen]
   271             return self._index[start : start + entrylen]
   251         elif endnode == node:
   272         elif endnode == node:
   252             return self._index[end:end + entrylen]
   273             return self._index[end : end + entrylen]
   253         else:
   274         else:
   254             while start < end - entrylen:
   275             while start < end - entrylen:
   255                 mid = start + (end - start) // 2
   276                 mid = start + (end - start) // 2
   256                 mid = mid - ((mid - origstart) % entrylen)
   277                 mid = mid - ((mid - origstart) % entrylen)
   257                 midnode = self._index[mid:mid + NODELENGTH]
   278                 midnode = self._index[mid : mid + NODELENGTH]
   258                 if midnode == node:
   279                 if midnode == node:
   259                     return self._index[mid:mid + entrylen]
   280                     return self._index[mid : mid + entrylen]
   260                 if node > midnode:
   281                 if node > midnode:
   261                     start = mid
   282                     start = mid
   262                 elif node < midnode:
   283                 elif node < midnode:
   263                     end = mid
   284                     end = mid
   264         return None
   285         return None
   268             ledger.markhistoryentry(self, filename, node)
   289             ledger.markhistoryentry(self, filename, node)
   269 
   290 
   270     def cleanup(self, ledger):
   291     def cleanup(self, ledger):
   271         entries = ledger.sources.get(self, [])
   292         entries = ledger.sources.get(self, [])
   272         allkeys = set(self)
   293         allkeys = set(self)
   273         repackedkeys = set((e.filename, e.node) for e in entries if
   294         repackedkeys = set(
   274                            e.historyrepacked)
   295             (e.filename, e.node) for e in entries if e.historyrepacked
       
   296         )
   275 
   297 
   276         if len(allkeys - repackedkeys) == 0:
   298         if len(allkeys - repackedkeys) == 0:
   277             if self.path not in ledger.created:
   299             if self.path not in ledger.created:
   278                 util.unlinkpath(self.indexpath, ignoremissing=True)
   300                 util.unlinkpath(self.indexpath, ignoremissing=True)
   279                 util.unlinkpath(self.packpath, ignoremissing=True)
   301                 util.unlinkpath(self.packpath, ignoremissing=True)
   286         # Start at 1 to skip the header
   308         # Start at 1 to skip the header
   287         offset = 1
   309         offset = 1
   288         while offset < self.datasize:
   310         while offset < self.datasize:
   289             data = self._data
   311             data = self._data
   290             # <2 byte len> + <filename>
   312             # <2 byte len> + <filename>
   291             filenamelen = struct.unpack('!H', data[offset:offset +
   313             filenamelen = struct.unpack(
   292                                                    constants.FILENAMESIZE])[0]
   314                 '!H', data[offset : offset + constants.FILENAMESIZE]
       
   315             )[0]
   293             offset += constants.FILENAMESIZE
   316             offset += constants.FILENAMESIZE
   294             filename = data[offset:offset + filenamelen]
   317             filename = data[offset : offset + filenamelen]
   295             offset += filenamelen
   318             offset += filenamelen
   296 
   319 
   297             revcount = struct.unpack('!I', data[offset:offset +
   320             revcount = struct.unpack(
   298                                                 ENTRYCOUNTSIZE])[0]
   321                 '!I', data[offset : offset + ENTRYCOUNTSIZE]
       
   322             )[0]
   299             offset += ENTRYCOUNTSIZE
   323             offset += ENTRYCOUNTSIZE
   300 
   324 
   301             for i in pycompat.xrange(revcount):
   325             for i in pycompat.xrange(revcount):
   302                 entry = struct.unpack(PACKFORMAT, data[offset:offset +
   326                 entry = struct.unpack(
   303                                                               PACKENTRYLENGTH])
   327                     PACKFORMAT, data[offset : offset + PACKENTRYLENGTH]
       
   328                 )
   304                 offset += PACKENTRYLENGTH
   329                 offset += PACKENTRYLENGTH
   305 
   330 
   306                 copyfrom = data[offset:offset + entry[ANC_COPYFROM]]
   331                 copyfrom = data[offset : offset + entry[ANC_COPYFROM]]
   307                 offset += entry[ANC_COPYFROM]
   332                 offset += entry[ANC_COPYFROM]
   308 
   333 
   309                 yield (filename, entry[ANC_NODE], entry[ANC_P1NODE],
   334                 yield (
   310                         entry[ANC_P2NODE], entry[ANC_LINKNODE], copyfrom)
   335                     filename,
       
   336                     entry[ANC_NODE],
       
   337                     entry[ANC_P1NODE],
       
   338                     entry[ANC_P2NODE],
       
   339                     entry[ANC_LINKNODE],
       
   340                     copyfrom,
       
   341                 )
   311 
   342 
   312                 self._pagedin += PACKENTRYLENGTH
   343                 self._pagedin += PACKENTRYLENGTH
   313 
   344 
   314             # If we've read a lot of data from the mmap, free some memory.
   345             # If we've read a lot of data from the mmap, free some memory.
   315             self.freememory()
   346             self.freememory()
       
   347 
   316 
   348 
   317 class mutablehistorypack(basepack.mutablebasepack):
   349 class mutablehistorypack(basepack.mutablebasepack):
   318     """A class for constructing and serializing a histpack file and index.
   350     """A class for constructing and serializing a histpack file and index.
   319 
   351 
   320     A history pack is a pair of files that contain the revision history for
   352     A history pack is a pair of files that contain the revision history for
   387         nodeindexentry = <node: 20 byte> [1]
   419         nodeindexentry = <node: 20 byte> [1]
   388                          <pack file node offset: 8 byte unsigned int> [1]
   420                          <pack file node offset: 8 byte unsigned int> [1]
   389 
   421 
   390     [1]: new in version 1.
   422     [1]: new in version 1.
   391     """
   423     """
       
   424 
   392     INDEXSUFFIX = INDEXSUFFIX
   425     INDEXSUFFIX = INDEXSUFFIX
   393     PACKSUFFIX = PACKSUFFIX
   426     PACKSUFFIX = PACKSUFFIX
   394 
   427 
   395     SUPPORTED_VERSIONS = [2]
   428     SUPPORTED_VERSIONS = [2]
   396 
   429 
   407         self.NODEINDEXENTRYLENGTH = NODEINDEXENTRYLENGTH
   440         self.NODEINDEXENTRYLENGTH = NODEINDEXENTRYLENGTH
   408 
   441 
   409     def add(self, filename, node, p1, p2, linknode, copyfrom):
   442     def add(self, filename, node, p1, p2, linknode, copyfrom):
   410         copyfrom = copyfrom or ''
   443         copyfrom = copyfrom or ''
   411         copyfromlen = struct.pack('!H', len(copyfrom))
   444         copyfromlen = struct.pack('!H', len(copyfrom))
   412         self.fileentries.setdefault(filename, []).append((node, p1, p2,
   445         self.fileentries.setdefault(filename, []).append(
   413                                                           linknode,
   446             (node, p1, p2, linknode, copyfromlen, copyfrom)
   414                                                           copyfromlen,
   447         )
   415                                                           copyfrom))
       
   416 
   448 
   417     def _write(self):
   449     def _write(self):
   418         for filename in sorted(self.fileentries):
   450         for filename in sorted(self.fileentries):
   419             entries = self.fileentries[filename]
   451             entries = self.fileentries[filename]
   420             sectionstart = self.packfp.tell()
   452             sectionstart = self.packfp.tell()
   421 
   453 
   422             # Write the file section content
   454             # Write the file section content
   423             entrymap = dict((e[0], e) for e in entries)
   455             entrymap = dict((e[0], e) for e in entries)
       
   456 
   424             def parentfunc(node):
   457             def parentfunc(node):
   425                 x, p1, p2, x, x, x = entrymap[node]
   458                 x, p1, p2, x, x, x = entrymap[node]
   426                 parents = []
   459                 parents = []
   427                 if p1 != nullid:
   460                 if p1 != nullid:
   428                     parents.append(p1)
   461                     parents.append(p1)
   429                 if p2 != nullid:
   462                 if p2 != nullid:
   430                     parents.append(p2)
   463                     parents.append(p2)
   431                 return parents
   464                 return parents
   432 
   465 
   433             sortednodes = list(reversed(shallowutil.sortnodes(
   466             sortednodes = list(
   434                 (e[0] for e in entries),
   467                 reversed(
   435                 parentfunc)))
   468                     shallowutil.sortnodes((e[0] for e in entries), parentfunc)
       
   469                 )
       
   470             )
   436 
   471 
   437             # Write the file section header
   472             # Write the file section header
   438             self.writeraw("%s%s%s" % (
   473             self.writeraw(
   439                 struct.pack('!H', len(filename)),
   474                 "%s%s%s"
   440                 filename,
   475                 % (
   441                 struct.pack('!I', len(sortednodes)),
   476                     struct.pack('!H', len(filename)),
   442             ))
   477                     filename,
       
   478                     struct.pack('!I', len(sortednodes)),
       
   479                 )
       
   480             )
   443 
   481 
   444             sectionlen = constants.FILENAMESIZE + len(filename) + 4
   482             sectionlen = constants.FILENAMESIZE + len(filename) + 4
   445 
   483 
   446             rawstrings = []
   484             rawstrings = []
   447 
   485 
   476         fileindexformat = self.INDEXFORMAT
   514         fileindexformat = self.INDEXFORMAT
   477         fileindexlength = self.INDEXENTRYLENGTH
   515         fileindexlength = self.INDEXENTRYLENGTH
   478         nodeindexformat = self.NODEINDEXFORMAT
   516         nodeindexformat = self.NODEINDEXFORMAT
   479         nodeindexlength = self.NODEINDEXENTRYLENGTH
   517         nodeindexlength = self.NODEINDEXENTRYLENGTH
   480 
   518 
   481         files = ((hashlib.sha1(filename).digest(), filename, offset, size)
   519         files = (
   482                 for filename, (offset, size) in self.files.iteritems())
   520             (hashlib.sha1(filename).digest(), filename, offset, size)
       
   521             for filename, (offset, size) in self.files.iteritems()
       
   522         )
   483         files = sorted(files)
   523         files = sorted(files)
   484 
   524 
   485         # node index is after file index size, file index, and node index size
   525         # node index is after file index size, file index, and node index size
   486         indexlensize = struct.calcsize('!Q')
   526         indexlensize = struct.calcsize('!Q')
   487         nodeindexoffset = (indexoffset + indexlensize +
   527         nodeindexoffset = (
   488                            (len(files) * fileindexlength) + indexlensize)
   528             indexoffset
       
   529             + indexlensize
       
   530             + (len(files) * fileindexlength)
       
   531             + indexlensize
       
   532         )
   489 
   533 
   490         fileindexentries = []
   534         fileindexentries = []
   491         nodeindexentries = []
   535         nodeindexentries = []
   492         nodecount = 0
   536         nodecount = 0
   493         for namehash, filename, offset, size in files:
   537         for namehash, filename, offset, size in files:
   494             # File section index
   538             # File section index
   495             nodelocations = self.entrylocations[filename]
   539             nodelocations = self.entrylocations[filename]
   496 
   540 
   497             nodeindexsize = len(nodelocations) * nodeindexlength
   541             nodeindexsize = len(nodelocations) * nodeindexlength
   498 
   542 
   499             rawentry = struct.pack(fileindexformat, namehash, offset, size,
   543             rawentry = struct.pack(
   500                                    nodeindexoffset, nodeindexsize)
   544                 fileindexformat,
       
   545                 namehash,
       
   546                 offset,
       
   547                 size,
       
   548                 nodeindexoffset,
       
   549                 nodeindexsize,
       
   550             )
   501             # Node index
   551             # Node index
   502             nodeindexentries.append(struct.pack(constants.FILENAMESTRUCT,
   552             nodeindexentries.append(
   503                                                 len(filename)) + filename)
   553                 struct.pack(constants.FILENAMESTRUCT, len(filename)) + filename
       
   554             )
   504             nodeindexoffset += constants.FILENAMESIZE + len(filename)
   555             nodeindexoffset += constants.FILENAMESIZE + len(filename)
   505 
   556 
   506             for node, location in sorted(nodelocations.iteritems()):
   557             for node, location in sorted(nodelocations.iteritems()):
   507                 nodeindexentries.append(struct.pack(nodeindexformat, node,
   558                 nodeindexentries.append(
   508                                                     location))
   559                     struct.pack(nodeindexformat, node, location)
       
   560                 )
   509                 nodecount += 1
   561                 nodecount += 1
   510 
   562 
   511             nodeindexoffset += len(nodelocations) * nodeindexlength
   563             nodeindexoffset += len(nodelocations) * nodeindexlength
   512 
   564 
   513             fileindexentries.append(rawentry)
   565             fileindexentries.append(rawentry)
   514 
   566 
   515         nodecountraw = struct.pack('!Q', nodecount)
   567         nodecountraw = struct.pack('!Q', nodecount)
   516         return (''.join(fileindexentries) + nodecountraw +
   568         return (
   517                 ''.join(nodeindexentries))
   569             ''.join(fileindexentries) + nodecountraw + ''.join(nodeindexentries)
       
   570         )