hgext/largefiles/basestore.py
author FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
Sat, 08 Nov 2014 00:48:41 +0900
changeset 23276 4be754832829
parent 19950 cce7ab960312
child 23941 164bd5218ddb
permissions -rw-r--r--
largefiles: move "copyalltostore" invocation into "markcommitted" Before this patch, while "hg convert", largefiles avoids copying largefiles in the working directory into the store area by combination of setting "repo._isconverting" in "mercurialsink{before|after}" and checking it in "copytostoreabsolute". This avoiding is needed while "hg convert", because converting doesn't update largefiles in the working directory. But this implementation is not efficient, because: - invocation in "markcommitted" can easily ensure updating largefiles in the working directory "markcommitted" is invoked only when new revision is committed via "commit" of "localrepository" (= with files in the working directory). On the other hand, "commitctx" may be invoked directly for in-memory committing. - committing without updating the working directory (e.g. "import --bypass") also needs this kind of avoiding For efficiency of this kind of avoiding, this patch does: - move "copyalltostore" invocation into "markcommitted" - remove meaningless procedures below: - hooking "mercurialsink{before|after}" to (un)set "repo._isconverting" - checking "repo._isconverting" in "copytostoreabsolute" This patch invokes "copyalltostore" also in "_commitcontext", because "_commitcontext" expects that largefiles in the working directory are copied into store area after "commitctx". In this case, the working directory is used as a kind of temporary area to write largefiles out, even though converted revisions are committed via "commitctx" (without updating normal files).

# Copyright 2009-2010 Gregory P. Ward
# Copyright 2009-2010 Intelerad Medical Systems Incorporated
# Copyright 2010-2011 Fog Creek Software
# Copyright 2010-2011 Unity Technologies
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

'''base class for store implementations and store-related utility code'''

import re

from mercurial import util, node, hg
from mercurial.i18n import _

import lfutil

class StoreError(Exception):
    '''Raised when there is a problem getting files from or putting
    files to a central store.'''
    def __init__(self, filename, hash, url, detail):
        self.filename = filename
        self.hash = hash
        self.url = url
        self.detail = detail

    def longmessage(self):
        return (_("error getting id %s from url %s for file %s: %s\n") %
                 (self.hash, util.hidepassword(self.url), self.filename,
                  self.detail))

    def __str__(self):
        return "%s: %s" % (util.hidepassword(self.url), self.detail)

class basestore(object):
    def __init__(self, ui, repo, url):
        self.ui = ui
        self.repo = repo
        self.url = url

    def put(self, source, hash):
        '''Put source file into the store so it can be retrieved by hash.'''
        raise NotImplementedError('abstract method')

    def exists(self, hashes):
        '''Check to see if the store contains the given hashes. Given an
        iterable of hashes it returns a mapping from hash to bool.'''
        raise NotImplementedError('abstract method')

    def get(self, files):
        '''Get the specified largefiles from the store and write to local
        files under repo.root.  files is a list of (filename, hash)
        tuples.  Return (success, missing), lists of files successfully
        downloaded and those not found in the store.  success is a list
        of (filename, hash) tuples; missing is a list of filenames that
        we could not get.  (The detailed error message will already have
        been presented to the user, so missing is just supplied as a
        summary.)'''
        success = []
        missing = []
        ui = self.ui

        at = 0
        available = self.exists(set(hash for (_filename, hash) in files))
        for filename, hash in files:
            ui.progress(_('getting largefiles'), at, unit='lfile',
                total=len(files))
            at += 1
            ui.note(_('getting %s:%s\n') % (filename, hash))

            if not available.get(hash):
                ui.warn(_('%s: largefile %s not available from %s\n')
                        % (filename, hash, util.hidepassword(self.url)))
                missing.append(filename)
                continue

            if self._gethash(filename, hash):
                success.append((filename, hash))
            else:
                missing.append(filename)

        ui.progress(_('getting largefiles'), None)
        return (success, missing)

    def _gethash(self, filename, hash):
        """Get file with the provided hash and store it in the local repo's
        store and in the usercache.
        filename is for informational messages only.
        """
        util.makedirs(lfutil.storepath(self.repo, ''))
        storefilename = lfutil.storepath(self.repo, hash)

        tmpname = storefilename + '.tmp'
        tmpfile = util.atomictempfile(tmpname,
                                      createmode=self.repo.store.createmode)

        try:
            gothash = self._getfile(tmpfile, filename, hash)
        except StoreError, err:
            self.ui.warn(err.longmessage())
            gothash = ""
        tmpfile.close()

        if gothash != hash:
            if gothash != "":
                self.ui.warn(_('%s: data corruption (expected %s, got %s)\n')
                             % (filename, hash, gothash))
            util.unlink(tmpname)
            return False

        util.rename(tmpname, storefilename)
        lfutil.linktousercache(self.repo, hash)
        return True

    def verify(self, revs, contents=False):
        '''Verify the existence (and, optionally, contents) of every big
        file revision referenced by every changeset in revs.
        Return 0 if all is well, non-zero on any errors.'''
        failed = False

        self.ui.status(_('searching %d changesets for largefiles\n') %
                       len(revs))
        verified = set()                # set of (filename, filenode) tuples

        for rev in revs:
            cctx = self.repo[rev]
            cset = "%d:%s" % (cctx.rev(), node.short(cctx.node()))

            for standin in cctx:
                if self._verifyfile(cctx, cset, contents, standin, verified):
                    failed = True

        numrevs = len(verified)
        numlfiles = len(set([fname for (fname, fnode) in verified]))
        if contents:
            self.ui.status(
                _('verified contents of %d revisions of %d largefiles\n')
                % (numrevs, numlfiles))
        else:
            self.ui.status(
                _('verified existence of %d revisions of %d largefiles\n')
                % (numrevs, numlfiles))
        return int(failed)

    def _getfile(self, tmpfile, filename, hash):
        '''Fetch one revision of one file from the store and write it
        to tmpfile.  Compute the hash of the file on-the-fly as it
        downloads and return the hash.  Close tmpfile.  Raise
        StoreError if unable to download the file (e.g. it does not
        exist in the store).'''
        raise NotImplementedError('abstract method')

    def _verifyfile(self, cctx, cset, contents, standin, verified):
        '''Perform the actual verification of a file in the store.
        'cset' is only used in warnings.
        'contents' controls verification of content hash.
        'standin' is the standin path of the largefile to verify.
        'verified' is maintained as a set of already verified files.
        Returns _true_ if it is a standin and any problems are found!
        '''
        raise NotImplementedError('abstract method')

import localstore, wirestore

_storeprovider = {
    'file':  [localstore.localstore],
    'http':  [wirestore.wirestore],
    'https': [wirestore.wirestore],
    'ssh': [wirestore.wirestore],
    }

_scheme_re = re.compile(r'^([a-zA-Z0-9+-.]+)://')

# During clone this function is passed the src's ui object
# but it needs the dest's ui object so it can read out of
# the config file. Use repo.ui instead.
def _openstore(repo, remote=None, put=False):
    ui = repo.ui

    if not remote:
        lfpullsource = getattr(repo, 'lfpullsource', None)
        if lfpullsource:
            path = ui.expandpath(lfpullsource)
        else:
            path = ui.expandpath('default-push', 'default')

        # ui.expandpath() leaves 'default-push' and 'default' alone if
        # they cannot be expanded: fallback to the empty string,
        # meaning the current directory.
        if path == 'default-push' or path == 'default':
            path = ''
            remote = repo
        else:
            path, _branches = hg.parseurl(path)
            remote = hg.peer(repo, {}, path)

    # The path could be a scheme so use Mercurial's normal functionality
    # to resolve the scheme to a repository and use its path
    path = util.safehasattr(remote, 'url') and remote.url() or remote.path

    match = _scheme_re.match(path)
    if not match:                       # regular filesystem path
        scheme = 'file'
    else:
        scheme = match.group(1)

    try:
        storeproviders = _storeprovider[scheme]
    except KeyError:
        raise util.Abort(_('unsupported URL scheme %r') % scheme)

    for classobj in storeproviders:
        try:
            return classobj(ui, repo, remote)
        except lfutil.storeprotonotcapable:
            pass

    raise util.Abort(_('%s does not appear to be a largefile store') %
                     util.hidepassword(path))