hgext/remotefilelog/repack.py
changeset 43076 2372284d9457
parent 42943 5fadf6103790
child 43077 687b865b95ad
equal deleted inserted replaced
43075:57875cf423c9 43076:2372284d9457
    29     shallowutil,
    29     shallowutil,
    30 )
    30 )
    31 
    31 
    32 osutil = policy.importmod(r'osutil')
    32 osutil = policy.importmod(r'osutil')
    33 
    33 
       
    34 
    34 class RepackAlreadyRunning(error.Abort):
    35 class RepackAlreadyRunning(error.Abort):
    35     pass
    36     pass
    36 
    37 
    37 def backgroundrepack(repo, incremental=True, packsonly=False,
    38 
    38                      ensurestart=False):
    39 def backgroundrepack(
       
    40     repo, incremental=True, packsonly=False, ensurestart=False
       
    41 ):
    39     cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']
    42     cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']
    40     msg = _("(running background repack)\n")
    43     msg = _("(running background repack)\n")
    41     if incremental:
    44     if incremental:
    42         cmd.append('--incremental')
    45         cmd.append('--incremental')
    43         msg = _("(running background incremental repack)\n")
    46         msg = _("(running background incremental repack)\n")
    45         cmd.append('--packsonly')
    48         cmd.append('--packsonly')
    46     repo.ui.warn(msg)
    49     repo.ui.warn(msg)
    47     # We know this command will find a binary, so don't block on it starting.
    50     # We know this command will find a binary, so don't block on it starting.
    48     procutil.runbgcommand(cmd, encoding.environ, ensurestart=ensurestart)
    51     procutil.runbgcommand(cmd, encoding.environ, ensurestart=ensurestart)
    49 
    52 
       
    53 
    50 def fullrepack(repo, options=None):
    54 def fullrepack(repo, options=None):
    51     """If ``packsonly`` is True, stores creating only loose objects are skipped.
    55     """If ``packsonly`` is True, stores creating only loose objects are skipped.
    52     """
    56     """
    53     if util.safehasattr(repo, 'shareddatastores'):
    57     if util.safehasattr(repo, 'shareddatastores'):
    54         datasource = contentstore.unioncontentstore(
    58         datasource = contentstore.unioncontentstore(*repo.shareddatastores)
    55             *repo.shareddatastores)
       
    56         historysource = metadatastore.unionmetadatastore(
    59         historysource = metadatastore.unionmetadatastore(
    57             *repo.sharedhistorystores,
    60             *repo.sharedhistorystores, allowincomplete=True
    58             allowincomplete=True)
    61         )
    59 
    62 
    60         packpath = shallowutil.getcachepackpath(
    63         packpath = shallowutil.getcachepackpath(
       
    64             repo, constants.FILEPACK_CATEGORY
       
    65         )
       
    66         _runrepack(
    61             repo,
    67             repo,
    62             constants.FILEPACK_CATEGORY)
    68             datasource,
    63         _runrepack(repo, datasource, historysource, packpath,
    69             historysource,
    64                    constants.FILEPACK_CATEGORY, options=options)
    70             packpath,
       
    71             constants.FILEPACK_CATEGORY,
       
    72             options=options,
       
    73         )
    65 
    74 
    66     if util.safehasattr(repo.manifestlog, 'datastore'):
    75     if util.safehasattr(repo.manifestlog, 'datastore'):
    67         localdata, shareddata = _getmanifeststores(repo)
    76         localdata, shareddata = _getmanifeststores(repo)
    68         lpackpath, ldstores, lhstores = localdata
    77         lpackpath, ldstores, lhstores = localdata
    69         spackpath, sdstores, shstores = shareddata
    78         spackpath, sdstores, shstores = shareddata
    70 
    79 
    71         # Repack the shared manifest store
    80         # Repack the shared manifest store
    72         datasource = contentstore.unioncontentstore(*sdstores)
    81         datasource = contentstore.unioncontentstore(*sdstores)
    73         historysource = metadatastore.unionmetadatastore(
    82         historysource = metadatastore.unionmetadatastore(
    74                         *shstores,
    83             *shstores, allowincomplete=True
    75                         allowincomplete=True)
    84         )
    76         _runrepack(repo, datasource, historysource, spackpath,
    85         _runrepack(
    77                    constants.TREEPACK_CATEGORY, options=options)
    86             repo,
       
    87             datasource,
       
    88             historysource,
       
    89             spackpath,
       
    90             constants.TREEPACK_CATEGORY,
       
    91             options=options,
       
    92         )
    78 
    93 
    79         # Repack the local manifest store
    94         # Repack the local manifest store
    80         datasource = contentstore.unioncontentstore(
    95         datasource = contentstore.unioncontentstore(
    81                         *ldstores,
    96             *ldstores, allowincomplete=True
    82                         allowincomplete=True)
    97         )
    83         historysource = metadatastore.unionmetadatastore(
    98         historysource = metadatastore.unionmetadatastore(
    84                         *lhstores,
    99             *lhstores, allowincomplete=True
    85                         allowincomplete=True)
   100         )
    86         _runrepack(repo, datasource, historysource, lpackpath,
   101         _runrepack(
    87                    constants.TREEPACK_CATEGORY, options=options)
   102             repo,
       
   103             datasource,
       
   104             historysource,
       
   105             lpackpath,
       
   106             constants.TREEPACK_CATEGORY,
       
   107             options=options,
       
   108         )
       
   109 
    88 
   110 
    89 def incrementalrepack(repo, options=None):
   111 def incrementalrepack(repo, options=None):
    90     """This repacks the repo by looking at the distribution of pack files in the
   112     """This repacks the repo by looking at the distribution of pack files in the
    91     repo and performing the most minimal repack to keep the repo in good shape.
   113     repo and performing the most minimal repack to keep the repo in good shape.
    92     """
   114     """
    93     if util.safehasattr(repo, 'shareddatastores'):
   115     if util.safehasattr(repo, 'shareddatastores'):
    94         packpath = shallowutil.getcachepackpath(
   116         packpath = shallowutil.getcachepackpath(
       
   117             repo, constants.FILEPACK_CATEGORY
       
   118         )
       
   119         _incrementalrepack(
    95             repo,
   120             repo,
    96             constants.FILEPACK_CATEGORY)
   121             repo.shareddatastores,
    97         _incrementalrepack(repo,
   122             repo.sharedhistorystores,
    98                            repo.shareddatastores,
   123             packpath,
    99                            repo.sharedhistorystores,
   124             constants.FILEPACK_CATEGORY,
   100                            packpath,
   125             options=options,
   101                            constants.FILEPACK_CATEGORY,
   126         )
   102                            options=options)
       
   103 
   127 
   104     if util.safehasattr(repo.manifestlog, 'datastore'):
   128     if util.safehasattr(repo.manifestlog, 'datastore'):
   105         localdata, shareddata = _getmanifeststores(repo)
   129         localdata, shareddata = _getmanifeststores(repo)
   106         lpackpath, ldstores, lhstores = localdata
   130         lpackpath, ldstores, lhstores = localdata
   107         spackpath, sdstores, shstores = shareddata
   131         spackpath, sdstores, shstores = shareddata
   108 
   132 
   109         # Repack the shared manifest store
   133         # Repack the shared manifest store
   110         _incrementalrepack(repo,
   134         _incrementalrepack(
   111                            sdstores,
   135             repo,
   112                            shstores,
   136             sdstores,
   113                            spackpath,
   137             shstores,
   114                            constants.TREEPACK_CATEGORY,
   138             spackpath,
   115                            options=options)
   139             constants.TREEPACK_CATEGORY,
       
   140             options=options,
       
   141         )
   116 
   142 
   117         # Repack the local manifest store
   143         # Repack the local manifest store
   118         _incrementalrepack(repo,
   144         _incrementalrepack(
   119                            ldstores,
   145             repo,
   120                            lhstores,
   146             ldstores,
   121                            lpackpath,
   147             lhstores,
   122                            constants.TREEPACK_CATEGORY,
   148             lpackpath,
   123                            allowincompletedata=True,
   149             constants.TREEPACK_CATEGORY,
   124                            options=options)
   150             allowincompletedata=True,
       
   151             options=options,
       
   152         )
       
   153 
   125 
   154 
   126 def _getmanifeststores(repo):
   155 def _getmanifeststores(repo):
   127     shareddatastores = repo.manifestlog.shareddatastores
   156     shareddatastores = repo.manifestlog.shareddatastores
   128     localdatastores = repo.manifestlog.localdatastores
   157     localdatastores = repo.manifestlog.localdatastores
   129     sharedhistorystores = repo.manifestlog.sharedhistorystores
   158     sharedhistorystores = repo.manifestlog.sharedhistorystores
   130     localhistorystores = repo.manifestlog.localhistorystores
   159     localhistorystores = repo.manifestlog.localhistorystores
   131 
   160 
   132     sharedpackpath = shallowutil.getcachepackpath(repo,
   161     sharedpackpath = shallowutil.getcachepackpath(
   133                                             constants.TREEPACK_CATEGORY)
   162         repo, constants.TREEPACK_CATEGORY
   134     localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
   163     )
   135                                             constants.TREEPACK_CATEGORY)
   164     localpackpath = shallowutil.getlocalpackpath(
   136 
   165         repo.svfs.vfs.base, constants.TREEPACK_CATEGORY
   137     return ((localpackpath, localdatastores, localhistorystores),
   166     )
   138             (sharedpackpath, shareddatastores, sharedhistorystores))
   167 
       
   168     return (
       
   169         (localpackpath, localdatastores, localhistorystores),
       
   170         (sharedpackpath, shareddatastores, sharedhistorystores),
       
   171     )
       
   172 
   139 
   173 
   140 def _topacks(packpath, files, constructor):
   174 def _topacks(packpath, files, constructor):
   141     paths = list(os.path.join(packpath, p) for p in files)
   175     paths = list(os.path.join(packpath, p) for p in files)
   142     packs = list(constructor(p) for p in paths)
   176     packs = list(constructor(p) for p in paths)
   143     return packs
   177     return packs
   144 
   178 
       
   179 
   145 def _deletebigpacks(repo, folder, files):
   180 def _deletebigpacks(repo, folder, files):
   146     """Deletes packfiles that are bigger than ``packs.maxpacksize``.
   181     """Deletes packfiles that are bigger than ``packs.maxpacksize``.
   147 
   182 
   148     Returns ``files` with the removed files omitted."""
   183     Returns ``files` with the removed files omitted."""
   149     maxsize = repo.ui.configbytes("packs", "maxpacksize")
   184     maxsize = repo.ui.configbytes("packs", "maxpacksize")
   154     # historypacks.
   189     # historypacks.
   155     VALIDEXTS = [".datapack", ".dataidx"]
   190     VALIDEXTS = [".datapack", ".dataidx"]
   156 
   191 
   157     # Either an oversize index or datapack will trigger cleanup of the whole
   192     # Either an oversize index or datapack will trigger cleanup of the whole
   158     # pack:
   193     # pack:
   159     oversized = {os.path.splitext(path)[0] for path, ftype, stat in files
   194     oversized = {
   160         if (stat.st_size > maxsize and (os.path.splitext(path)[1]
   195         os.path.splitext(path)[0]
   161                                         in VALIDEXTS))}
   196         for path, ftype, stat in files
       
   197         if (stat.st_size > maxsize and (os.path.splitext(path)[1] in VALIDEXTS))
       
   198     }
   162 
   199 
   163     for rootfname in oversized:
   200     for rootfname in oversized:
   164         rootpath = os.path.join(folder, rootfname)
   201         rootpath = os.path.join(folder, rootfname)
   165         for ext in VALIDEXTS:
   202         for ext in VALIDEXTS:
   166             path = rootpath + ext
   203             path = rootpath + ext
   167             repo.ui.debug('removing oversize packfile %s (%s)\n' %
   204             repo.ui.debug(
   168                           (path, util.bytecount(os.stat(path).st_size)))
   205                 'removing oversize packfile %s (%s)\n'
       
   206                 % (path, util.bytecount(os.stat(path).st_size))
       
   207             )
   169             os.unlink(path)
   208             os.unlink(path)
   170     return [row for row in files if os.path.basename(row[0]) not in oversized]
   209     return [row for row in files if os.path.basename(row[0]) not in oversized]
   171 
   210 
   172 def _incrementalrepack(repo, datastore, historystore, packpath, category,
   211 
   173         allowincompletedata=False, options=None):
   212 def _incrementalrepack(
       
   213     repo,
       
   214     datastore,
       
   215     historystore,
       
   216     packpath,
       
   217     category,
       
   218     allowincompletedata=False,
       
   219     options=None,
       
   220 ):
   174     shallowutil.mkstickygroupdir(repo.ui, packpath)
   221     shallowutil.mkstickygroupdir(repo.ui, packpath)
   175 
   222 
   176     files = osutil.listdir(packpath, stat=True)
   223     files = osutil.listdir(packpath, stat=True)
   177     files = _deletebigpacks(repo, packpath, files)
   224     files = _deletebigpacks(repo, packpath, files)
   178     datapacks = _topacks(packpath,
   225     datapacks = _topacks(
   179         _computeincrementaldatapack(repo.ui, files),
   226         packpath, _computeincrementaldatapack(repo.ui, files), datapack.datapack
   180         datapack.datapack)
   227     )
   181     datapacks.extend(s for s in datastore
   228     datapacks.extend(
   182                      if not isinstance(s, datapack.datapackstore))
   229         s for s in datastore if not isinstance(s, datapack.datapackstore)
   183 
   230     )
   184     historypacks = _topacks(packpath,
   231 
       
   232     historypacks = _topacks(
       
   233         packpath,
   185         _computeincrementalhistorypack(repo.ui, files),
   234         _computeincrementalhistorypack(repo.ui, files),
   186         historypack.historypack)
   235         historypack.historypack,
   187     historypacks.extend(s for s in historystore
   236     )
   188                         if not isinstance(s, historypack.historypackstore))
   237     historypacks.extend(
       
   238         s
       
   239         for s in historystore
       
   240         if not isinstance(s, historypack.historypackstore)
       
   241     )
   189 
   242 
   190     # ``allhistory{files,packs}`` contains all known history packs, even ones we
   243     # ``allhistory{files,packs}`` contains all known history packs, even ones we
   191     # don't plan to repack. They are used during the datapack repack to ensure
   244     # don't plan to repack. They are used during the datapack repack to ensure
   192     # good ordering of nodes.
   245     # good ordering of nodes.
   193     allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,
   246     allhistoryfiles = _allpackfileswithsuffix(
   194                             historypack.INDEXSUFFIX)
   247         files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX
   195     allhistorypacks = _topacks(packpath,
   248     )
       
   249     allhistorypacks = _topacks(
       
   250         packpath,
   196         (f for f, mode, stat in allhistoryfiles),
   251         (f for f, mode, stat in allhistoryfiles),
   197         historypack.historypack)
   252         historypack.historypack,
   198     allhistorypacks.extend(s for s in historystore
   253     )
   199                         if not isinstance(s, historypack.historypackstore))
   254     allhistorypacks.extend(
   200     _runrepack(repo,
   255         s
   201                contentstore.unioncontentstore(
   256         for s in historystore
   202                    *datapacks,
   257         if not isinstance(s, historypack.historypackstore)
   203                    allowincomplete=allowincompletedata),
   258     )
   204                metadatastore.unionmetadatastore(
   259     _runrepack(
   205                    *historypacks,
   260         repo,
   206                    allowincomplete=True),
   261         contentstore.unioncontentstore(
   207                packpath, category,
   262             *datapacks, allowincomplete=allowincompletedata
   208                fullhistory=metadatastore.unionmetadatastore(
   263         ),
   209                    *allhistorypacks,
   264         metadatastore.unionmetadatastore(*historypacks, allowincomplete=True),
   210                    allowincomplete=True),
   265         packpath,
   211                 options=options)
   266         category,
       
   267         fullhistory=metadatastore.unionmetadatastore(
       
   268             *allhistorypacks, allowincomplete=True
       
   269         ),
       
   270         options=options,
       
   271     )
       
   272 
   212 
   273 
   213 def _computeincrementaldatapack(ui, files):
   274 def _computeincrementaldatapack(ui, files):
   214     opts = {
   275     opts = {
   215         'gencountlimit' : ui.configint(
   276         'gencountlimit': ui.configint('remotefilelog', 'data.gencountlimit'),
   216             'remotefilelog', 'data.gencountlimit'),
   277         'generations': ui.configlist('remotefilelog', 'data.generations'),
   217         'generations' : ui.configlist(
   278         'maxrepackpacks': ui.configint('remotefilelog', 'data.maxrepackpacks'),
   218             'remotefilelog', 'data.generations'),
   279         'repackmaxpacksize': ui.configbytes(
   219         'maxrepackpacks' : ui.configint(
   280             'remotefilelog', 'data.repackmaxpacksize'
   220             'remotefilelog', 'data.maxrepackpacks'),
   281         ),
   221         'repackmaxpacksize' : ui.configbytes(
   282         'repacksizelimit': ui.configbytes(
   222             'remotefilelog', 'data.repackmaxpacksize'),
   283             'remotefilelog', 'data.repacksizelimit'
   223         'repacksizelimit' : ui.configbytes(
   284         ),
   224             'remotefilelog', 'data.repacksizelimit'),
       
   225     }
   285     }
   226 
   286 
   227     packfiles = _allpackfileswithsuffix(
   287     packfiles = _allpackfileswithsuffix(
   228         files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)
   288         files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX
       
   289     )
   229     return _computeincrementalpack(packfiles, opts)
   290     return _computeincrementalpack(packfiles, opts)
       
   291 
   230 
   292 
   231 def _computeincrementalhistorypack(ui, files):
   293 def _computeincrementalhistorypack(ui, files):
   232     opts = {
   294     opts = {
   233         'gencountlimit' : ui.configint(
   295         'gencountlimit': ui.configint('remotefilelog', 'history.gencountlimit'),
   234             'remotefilelog', 'history.gencountlimit'),
   296         'generations': ui.configlist(
   235         'generations' : ui.configlist(
   297             'remotefilelog', 'history.generations', ['100MB']
   236             'remotefilelog', 'history.generations', ['100MB']),
   298         ),
   237         'maxrepackpacks' : ui.configint(
   299         'maxrepackpacks': ui.configint(
   238             'remotefilelog', 'history.maxrepackpacks'),
   300             'remotefilelog', 'history.maxrepackpacks'
   239         'repackmaxpacksize' : ui.configbytes(
   301         ),
   240             'remotefilelog', 'history.repackmaxpacksize', '400MB'),
   302         'repackmaxpacksize': ui.configbytes(
   241         'repacksizelimit' : ui.configbytes(
   303             'remotefilelog', 'history.repackmaxpacksize', '400MB'
   242             'remotefilelog', 'history.repacksizelimit'),
   304         ),
       
   305         'repacksizelimit': ui.configbytes(
       
   306             'remotefilelog', 'history.repacksizelimit'
       
   307         ),
   243     }
   308     }
   244 
   309 
   245     packfiles = _allpackfileswithsuffix(
   310     packfiles = _allpackfileswithsuffix(
   246         files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)
   311         files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX
       
   312     )
   247     return _computeincrementalpack(packfiles, opts)
   313     return _computeincrementalpack(packfiles, opts)
       
   314 
   248 
   315 
   249 def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
   316 def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
   250     result = []
   317     result = []
   251     fileset = set(fn for fn, mode, stat in files)
   318     fileset = set(fn for fn, mode, stat in files)
   252     for filename, mode, stat in files:
   319     for filename, mode, stat in files:
   253         if not filename.endswith(packsuffix):
   320         if not filename.endswith(packsuffix):
   254             continue
   321             continue
   255 
   322 
   256         prefix = filename[:-len(packsuffix)]
   323         prefix = filename[: -len(packsuffix)]
   257 
   324 
   258         # Don't process a pack if it doesn't have an index.
   325         # Don't process a pack if it doesn't have an index.
   259         if (prefix + indexsuffix) not in fileset:
   326         if (prefix + indexsuffix) not in fileset:
   260             continue
   327             continue
   261         result.append((prefix, mode, stat))
   328         result.append((prefix, mode, stat))
   262 
   329 
   263     return result
   330     return result
   264 
   331 
       
   332 
   265 def _computeincrementalpack(files, opts):
   333 def _computeincrementalpack(files, opts):
   266     """Given a set of pack files along with the configuration options, this
   334     """Given a set of pack files along with the configuration options, this
   267     function computes the list of files that should be packed as part of an
   335     function computes the list of files that should be packed as part of an
   268     incremental repack.
   336     incremental repack.
   269 
   337 
   270     It tries to strike a balance between keeping incremental repacks cheap (i.e.
   338     It tries to strike a balance between keeping incremental repacks cheap (i.e.
   271     packing small things when possible, and rolling the packs up to the big ones
   339     packing small things when possible, and rolling the packs up to the big ones
   272     over time).
   340     over time).
   273     """
   341     """
   274 
   342 
   275     limits = list(sorted((util.sizetoint(s) for s in opts['generations']),
   343     limits = list(
   276                                 reverse=True))
   344         sorted((util.sizetoint(s) for s in opts['generations']), reverse=True)
       
   345     )
   277     limits.append(0)
   346     limits.append(0)
   278 
   347 
   279     # Group the packs by generation (i.e. by size)
   348     # Group the packs by generation (i.e. by size)
   280     generations = []
   349     generations = []
   281     for i in pycompat.xrange(len(limits)):
   350     for i in pycompat.xrange(len(limits)):
   301     # Find the largest generation with more than gencountlimit packs
   370     # Find the largest generation with more than gencountlimit packs
   302     genpacks = []
   371     genpacks = []
   303     for i, limit in enumerate(limits):
   372     for i, limit in enumerate(limits):
   304         if len(generations[i]) > opts['gencountlimit']:
   373         if len(generations[i]) > opts['gencountlimit']:
   305             # Sort to be smallest last, for easy popping later
   374             # Sort to be smallest last, for easy popping later
   306             genpacks.extend(sorted(generations[i], reverse=True,
   375             genpacks.extend(
   307                                    key=lambda x: sizes[x]))
   376                 sorted(generations[i], reverse=True, key=lambda x: sizes[x])
       
   377             )
   308             break
   378             break
   309 
   379 
   310     # Take as many packs from the generation as we can
   380     # Take as many packs from the generation as we can
   311     chosenpacks = genpacks[-3:]
   381     chosenpacks = genpacks[-3:]
   312     genpacks = genpacks[:-3]
   382     genpacks = genpacks[:-3]
   313     repacksize = sum(sizes[n] for n in chosenpacks)
   383     repacksize = sum(sizes[n] for n in chosenpacks)
   314     while (repacksize < opts['repacksizelimit'] and genpacks and
   384     while (
   315            len(chosenpacks) < opts['maxrepackpacks']):
   385         repacksize < opts['repacksizelimit']
       
   386         and genpacks
       
   387         and len(chosenpacks) < opts['maxrepackpacks']
       
   388     ):
   316         chosenpacks.append(genpacks.pop())
   389         chosenpacks.append(genpacks.pop())
   317         repacksize += sizes[chosenpacks[-1]]
   390         repacksize += sizes[chosenpacks[-1]]
   318 
   391 
   319     return chosenpacks
   392     return chosenpacks
   320 
   393 
   321 def _runrepack(repo, data, history, packpath, category, fullhistory=None,
   394 
   322                options=None):
   395 def _runrepack(
       
   396     repo, data, history, packpath, category, fullhistory=None, options=None
       
   397 ):
   323     shallowutil.mkstickygroupdir(repo.ui, packpath)
   398     shallowutil.mkstickygroupdir(repo.ui, packpath)
   324 
   399 
   325     def isold(repo, filename, node):
   400     def isold(repo, filename, node):
   326         """Check if the file node is older than a limit.
   401         """Check if the file node is older than a limit.
   327         Unless a limit is specified in the config the default limit is taken.
   402         Unless a limit is specified in the config the default limit is taken.
   335         return filetime[0] < limit
   410         return filetime[0] < limit
   336 
   411 
   337     garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
   412     garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
   338     if not fullhistory:
   413     if not fullhistory:
   339         fullhistory = history
   414         fullhistory = history
   340     packer = repacker(repo, data, history, fullhistory, category,
   415     packer = repacker(
   341                       gc=garbagecollect, isold=isold, options=options)
   416         repo,
       
   417         data,
       
   418         history,
       
   419         fullhistory,
       
   420         category,
       
   421         gc=garbagecollect,
       
   422         isold=isold,
       
   423         options=options,
       
   424     )
   342 
   425 
   343     with datapack.mutabledatapack(repo.ui, packpath) as dpack:
   426     with datapack.mutabledatapack(repo.ui, packpath) as dpack:
   344         with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
   427         with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
   345             try:
   428             try:
   346                 packer.run(dpack, hpack)
   429                 packer.run(dpack, hpack)
   347             except error.LockHeld:
   430             except error.LockHeld:
   348                 raise RepackAlreadyRunning(_("skipping repack - another repack "
   431                 raise RepackAlreadyRunning(
   349                                              "is already running"))
   432                     _("skipping repack - another repack " "is already running")
       
   433                 )
       
   434 
   350 
   435 
   351 def keepset(repo, keyfn, lastkeepkeys=None):
   436 def keepset(repo, keyfn, lastkeepkeys=None):
   352     """Computes a keepset which is not garbage collected.
   437     """Computes a keepset which is not garbage collected.
   353     'keyfn' is a function that maps filename, node to a unique key.
   438     'keyfn' is a function that maps filename, node to a unique key.
   354     'lastkeepkeys' is an optional argument and if provided the keepset
   439     'lastkeepkeys' is an optional argument and if provided the keepset
   407             for filename, filenode in m.iteritems():
   492             for filename, filenode in m.iteritems():
   408                 keepkeys.add(keyfn(filename, filenode))
   493                 keepkeys.add(keyfn(filename, filenode))
   409 
   494 
   410     return keepkeys
   495     return keepkeys
   411 
   496 
       
   497 
   412 class repacker(object):
   498 class repacker(object):
   413     """Class for orchestrating the repack of data and history information into a
   499     """Class for orchestrating the repack of data and history information into a
   414     new format.
   500     new format.
   415     """
   501     """
   416     def __init__(self, repo, data, history, fullhistory, category, gc=False,
   502 
   417                  isold=None, options=None):
   503     def __init__(
       
   504         self,
       
   505         repo,
       
   506         data,
       
   507         history,
       
   508         fullhistory,
       
   509         category,
       
   510         gc=False,
       
   511         isold=None,
       
   512         options=None,
       
   513     ):
   418         self.repo = repo
   514         self.repo = repo
   419         self.data = data
   515         self.data = data
   420         self.history = history
   516         self.history = history
   421         self.fullhistory = fullhistory
   517         self.fullhistory = fullhistory
   422         self.unit = constants.getunits(category)
   518         self.unit = constants.getunits(category)
   424         self.options = options
   520         self.options = options
   425         if self.garbagecollect:
   521         if self.garbagecollect:
   426             if not isold:
   522             if not isold:
   427                 raise ValueError("Function 'isold' is not properly specified")
   523                 raise ValueError("Function 'isold' is not properly specified")
   428             # use (filename, node) tuple as a keepset key
   524             # use (filename, node) tuple as a keepset key
   429             self.keepkeys = keepset(repo, lambda f, n : (f, n))
   525             self.keepkeys = keepset(repo, lambda f, n: (f, n))
   430             self.isold = isold
   526             self.isold = isold
   431 
   527 
   432     def run(self, targetdata, targethistory):
   528     def run(self, targetdata, targethistory):
   433         ledger = repackledger()
   529         ledger = repackledger()
   434 
   530 
   435         with lockmod.lock(repacklockvfs(self.repo), "repacklock", desc=None,
   531         with lockmod.lock(
   436                           timeout=0):
   532             repacklockvfs(self.repo), "repacklock", desc=None, timeout=0
       
   533         ):
   437             self.repo.hook('prerepack')
   534             self.repo.hook('prerepack')
   438 
   535 
   439             # Populate ledger from source
   536             # Populate ledger from source
   440             self.data.markledger(ledger, options=self.options)
   537             self.data.markledger(ledger, options=self.options)
   441             self.history.markledger(ledger, options=self.options)
   538             self.history.markledger(ledger, options=self.options)
   471         # likely to be the newest version assuming files grow over time.
   568         # likely to be the newest version assuming files grow over time.
   472         # (Sort by node first to ensure the sort is stable.)
   569         # (Sort by node first to ensure the sort is stable.)
   473         orphans = sorted(orphans)
   570         orphans = sorted(orphans)
   474         orphans = list(sorted(orphans, key=getsize, reverse=True))
   571         orphans = list(sorted(orphans, key=getsize, reverse=True))
   475         if ui.debugflag:
   572         if ui.debugflag:
   476             ui.debug("%s: orphan chain: %s\n" % (filename,
   573             ui.debug(
   477                 ", ".join([short(s) for s in orphans])))
   574                 "%s: orphan chain: %s\n"
       
   575                 % (filename, ", ".join([short(s) for s in orphans]))
       
   576             )
   478 
   577 
   479         # Create one contiguous chain and reassign deltabases.
   578         # Create one contiguous chain and reassign deltabases.
   480         for i, node in enumerate(orphans):
   579         for i, node in enumerate(orphans):
   481             if i == 0:
   580             if i == 0:
   482                 deltabases[node] = (nullid, 0)
   581                 deltabases[node] = (nullid, 0)
   495         for entry in ledger.entries.itervalues():
   594         for entry in ledger.entries.itervalues():
   496             if entry.datasource:
   595             if entry.datasource:
   497                 byfile.setdefault(entry.filename, {})[entry.node] = entry
   596                 byfile.setdefault(entry.filename, {})[entry.node] = entry
   498 
   597 
   499         count = 0
   598         count = 0
   500         repackprogress = ui.makeprogress(_("repacking data"), unit=self.unit,
   599         repackprogress = ui.makeprogress(
   501                                             total=len(byfile))
   600             _("repacking data"), unit=self.unit, total=len(byfile)
       
   601         )
   502         for filename, entries in sorted(byfile.iteritems()):
   602         for filename, entries in sorted(byfile.iteritems()):
   503             repackprogress.update(count)
   603             repackprogress.update(count)
   504 
   604 
   505             ancestors = {}
   605             ancestors = {}
   506             nodes = list(node for node in entries)
   606             nodes = list(node for node in entries)
   507             nohistory = []
   607             nohistory = []
   508             buildprogress = ui.makeprogress(_("building history"), unit='nodes',
   608             buildprogress = ui.makeprogress(
   509                                             total=len(nodes))
   609                 _("building history"), unit='nodes', total=len(nodes)
       
   610             )
   510             for i, node in enumerate(nodes):
   611             for i, node in enumerate(nodes):
   511                 if node in ancestors:
   612                 if node in ancestors:
   512                     continue
   613                     continue
   513                 buildprogress.update(i)
   614                 buildprogress.update(i)
   514                 try:
   615                 try:
   515                     ancestors.update(self.fullhistory.getancestors(filename,
   616                     ancestors.update(
   516                         node, known=ancestors))
   617                         self.fullhistory.getancestors(
       
   618                             filename, node, known=ancestors
       
   619                         )
       
   620                     )
   517                 except KeyError:
   621                 except KeyError:
   518                     # Since we're packing data entries, we may not have the
   622                     # Since we're packing data entries, we may not have the
   519                     # corresponding history entries for them. It's not a big
   623                     # corresponding history entries for them. It's not a big
   520                     # deal, but the entries won't be delta'd perfectly.
   624                     # deal, but the entries won't be delta'd perfectly.
   521                     nohistory.append(node)
   625                     nohistory.append(node)
   522             buildprogress.complete()
   626             buildprogress.complete()
   523 
   627 
   524             # Order the nodes children first, so we can produce reverse deltas
   628             # Order the nodes children first, so we can produce reverse deltas
   525             orderednodes = list(reversed(self._toposort(ancestors)))
   629             orderednodes = list(reversed(self._toposort(ancestors)))
   526             if len(nohistory) > 0:
   630             if len(nohistory) > 0:
   527                 ui.debug('repackdata: %d nodes without history\n' %
   631                 ui.debug(
   528                          len(nohistory))
   632                     'repackdata: %d nodes without history\n' % len(nohistory)
       
   633                 )
   529             orderednodes.extend(sorted(nohistory))
   634             orderednodes.extend(sorted(nohistory))
   530 
   635 
   531             # Filter orderednodes to just the nodes we want to serialize (it
   636             # Filter orderednodes to just the nodes we want to serialize (it
   532             # currently also has the edge nodes' ancestors).
   637             # currently also has the edge nodes' ancestors).
   533             orderednodes = list(filter(lambda node: node in nodes,
   638             orderednodes = list(
   534                                 orderednodes))
   639                 filter(lambda node: node in nodes, orderednodes)
       
   640             )
   535 
   641 
   536             # Garbage collect old nodes:
   642             # Garbage collect old nodes:
   537             if self.garbagecollect:
   643             if self.garbagecollect:
   538                 neworderednodes = []
   644                 neworderednodes = []
   539                 for node in orderednodes:
   645                 for node in orderednodes:
   540                     # If the node is old and is not in the keepset, we skip it,
   646                     # If the node is old and is not in the keepset, we skip it,
   541                     # and mark as garbage collected
   647                     # and mark as garbage collected
   542                     if ((filename, node) not in self.keepkeys and
   648                     if (filename, node) not in self.keepkeys and self.isold(
   543                         self.isold(self.repo, filename, node)):
   649                         self.repo, filename, node
       
   650                     ):
   544                         entries[node].gced = True
   651                         entries[node].gced = True
   545                         continue
   652                         continue
   546                     neworderednodes.append(node)
   653                     neworderednodes.append(node)
   547                 orderednodes = neworderednodes
   654                 orderednodes = neworderednodes
   548 
   655 
   549             # Compute delta bases for nodes:
   656             # Compute delta bases for nodes:
   550             deltabases = {}
   657             deltabases = {}
   551             nobase = set()
   658             nobase = set()
   552             referenced = set()
   659             referenced = set()
   553             nodes = set(nodes)
   660             nodes = set(nodes)
   554             processprogress = ui.makeprogress(_("processing nodes"),
   661             processprogress = ui.makeprogress(
   555                                               unit='nodes',
   662                 _("processing nodes"), unit='nodes', total=len(orderednodes)
   556                                               total=len(orderednodes))
   663             )
   557             for i, node in enumerate(orderednodes):
   664             for i, node in enumerate(orderednodes):
   558                 processprogress.update(i)
   665                 processprogress.update(i)
   559                 # Find delta base
   666                 # Find delta base
   560                 # TODO: allow delta'ing against most recent descendant instead
   667                 # TODO: allow delta'ing against most recent descendant instead
   561                 # of immediate child
   668                 # of immediate child
   591                             deltabases[p2] = (node, chainlen + 1)
   698                             deltabases[p2] = (node, chainlen + 1)
   592 
   699 
   593             # experimental config: repack.chainorphansbysize
   700             # experimental config: repack.chainorphansbysize
   594             if ui.configbool('repack', 'chainorphansbysize'):
   701             if ui.configbool('repack', 'chainorphansbysize'):
   595                 orphans = nobase - referenced
   702                 orphans = nobase - referenced
   596                 orderednodes = self._chainorphans(ui, filename, orderednodes,
   703                 orderednodes = self._chainorphans(
   597                     orphans, deltabases)
   704                     ui, filename, orderednodes, orphans, deltabases
       
   705                 )
   598 
   706 
   599             # Compute deltas and write to the pack
   707             # Compute deltas and write to the pack
   600             for i, node in enumerate(orderednodes):
   708             for i, node in enumerate(orderednodes):
   601                 deltabase, chainlen = deltabases[node]
   709                 deltabase, chainlen = deltabases[node]
   602                 # Compute delta
   710                 # Compute delta
   605                 # be fetching the same deltachain over and over again.
   713                 # be fetching the same deltachain over and over again.
   606                 if deltabase != nullid:
   714                 if deltabase != nullid:
   607                     deltaentry = self.data.getdelta(filename, node)
   715                     deltaentry = self.data.getdelta(filename, node)
   608                     delta, deltabasename, origdeltabase, meta = deltaentry
   716                     delta, deltabasename, origdeltabase, meta = deltaentry
   609                     size = meta.get(constants.METAKEYSIZE)
   717                     size = meta.get(constants.METAKEYSIZE)
   610                     if (deltabasename != filename or origdeltabase != deltabase
   718                     if (
   611                         or size is None):
   719                         deltabasename != filename
       
   720                         or origdeltabase != deltabase
       
   721                         or size is None
       
   722                     ):
   612                         deltabasetext = self.data.get(filename, deltabase)
   723                         deltabasetext = self.data.get(filename, deltabase)
   613                         original = self.data.get(filename, node)
   724                         original = self.data.get(filename, node)
   614                         size = len(original)
   725                         size = len(original)
   615                         delta = mdiff.textdiff(deltabasetext, original)
   726                         delta = mdiff.textdiff(deltabasetext, original)
   616                 else:
   727                 else:
   637         byfile = {}
   748         byfile = {}
   638         for entry in ledger.entries.itervalues():
   749         for entry in ledger.entries.itervalues():
   639             if entry.historysource:
   750             if entry.historysource:
   640                 byfile.setdefault(entry.filename, {})[entry.node] = entry
   751                 byfile.setdefault(entry.filename, {})[entry.node] = entry
   641 
   752 
   642         progress = ui.makeprogress(_("repacking history"), unit=self.unit,
   753         progress = ui.makeprogress(
   643                                    total=len(byfile))
   754             _("repacking history"), unit=self.unit, total=len(byfile)
       
   755         )
   644         for filename, entries in sorted(byfile.iteritems()):
   756         for filename, entries in sorted(byfile.iteritems()):
   645             ancestors = {}
   757             ancestors = {}
   646             nodes = list(node for node in entries)
   758             nodes = list(node for node in entries)
   647 
   759 
   648             for node in nodes:
   760             for node in nodes:
   649                 if node in ancestors:
   761                 if node in ancestors:
   650                     continue
   762                     continue
   651                 ancestors.update(self.history.getancestors(filename, node,
   763                 ancestors.update(
   652                                                            known=ancestors))
   764                     self.history.getancestors(filename, node, known=ancestors)
       
   765                 )
   653 
   766 
   654             # Order the nodes children first
   767             # Order the nodes children first
   655             orderednodes = reversed(self._toposort(ancestors))
   768             orderednodes = reversed(self._toposort(ancestors))
   656 
   769 
   657             # Write to the pack
   770             # Write to the pack
   700             return parents
   813             return parents
   701 
   814 
   702         sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
   815         sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
   703         return sortednodes
   816         return sortednodes
   704 
   817 
       
   818 
   705 class repackledger(object):
   819 class repackledger(object):
   706     """Storage for all the bookkeeping that happens during a repack. It contains
   820     """Storage for all the bookkeeping that happens during a repack. It contains
   707     the list of revisions being repacked, what happened to each revision, and
   821     the list of revisions being repacked, what happened to each revision, and
   708     which source store contained which revision originally (for later cleanup).
   822     which source store contained which revision originally (for later cleanup).
   709     """
   823     """
       
   824 
   710     def __init__(self):
   825     def __init__(self):
   711         self.entries = {}
   826         self.entries = {}
   712         self.sources = {}
   827         self.sources = {}
   713         self.created = set()
   828         self.created = set()
   714 
   829 
   746         return value
   861         return value
   747 
   862 
   748     def addcreated(self, value):
   863     def addcreated(self, value):
   749         self.created.add(value)
   864         self.created.add(value)
   750 
   865 
       
   866 
   751 class repackentry(object):
   867 class repackentry(object):
   752     """Simple class representing a single revision entry in the repackledger.
   868     """Simple class representing a single revision entry in the repackledger.
   753     """
   869     """
   754     __slots__ = (r'filename', r'node', r'datasource', r'historysource',
   870 
   755                  r'datarepacked', r'historyrepacked', r'gced')
   871     __slots__ = (
       
   872         r'filename',
       
   873         r'node',
       
   874         r'datasource',
       
   875         r'historysource',
       
   876         r'datarepacked',
       
   877         r'historyrepacked',
       
   878         r'gced',
       
   879     )
       
   880 
   756     def __init__(self, filename, node):
   881     def __init__(self, filename, node):
   757         self.filename = filename
   882         self.filename = filename
   758         self.node = node
   883         self.node = node
   759         # If the revision has a data entry in the source
   884         # If the revision has a data entry in the source
   760         self.datasource = False
   885         self.datasource = False
   765         # If the revision's history entry was repacked into the repack target
   890         # If the revision's history entry was repacked into the repack target
   766         self.historyrepacked = False
   891         self.historyrepacked = False
   767         # If garbage collected
   892         # If garbage collected
   768         self.gced = False
   893         self.gced = False
   769 
   894 
       
   895 
   770 def repacklockvfs(repo):
   896 def repacklockvfs(repo):
   771     if util.safehasattr(repo, 'name'):
   897     if util.safehasattr(repo, 'name'):
   772         # Lock in the shared cache so repacks across multiple copies of the same
   898         # Lock in the shared cache so repacks across multiple copies of the same
   773         # repo are coordinated.
   899         # repo are coordinated.
   774         sharedcachepath = shallowutil.getcachepackpath(
   900         sharedcachepath = shallowutil.getcachepackpath(
   775             repo,
   901             repo, constants.FILEPACK_CATEGORY
   776             constants.FILEPACK_CATEGORY)
   902         )
   777         return vfs.vfs(sharedcachepath)
   903         return vfs.vfs(sharedcachepath)
   778     else:
   904     else:
   779         return repo.svfs
   905         return repo.svfs