hgext/largefiles/reposetup.py
author Matt Harbison <matt_harbison@yahoo.com>
Sun, 18 Jan 2015 15:15:40 -0500
branchstable
changeset 23923 ab6fd3205dad
parent 23660 1ec6dbb9b216
child 23958 df463ca0adef
permissions -rw-r--r--
largefiles: fix commit of a directory with no largefile changes (issue4330) When a directory is named in the commit file list, the previous behavior was to walk the list, and if no normal files in the directory were also named, add the corresponding standin for each largefile in that directory. The directory is then dropped from the list, so that committing a directory with no normal file changes works. It then added the corresponding standin directory for the first largefile seen, by prefixing it with '.hglf/'. The latter is unnecessary since each affected largefile is explicitly referenced by its standin in the list. It also caused an abort if there were no changed largefiles in the directory, because none of its standins changed: abort: .hglf/foo/bar: no match under directory! This list of files is used to tweak a matcher in lfutil.updatestandinsbymatch(), which is what is passed to commit(). The status() call that is ultimately done in the commit code with this matcher seems to have some OS specific differences. It is not necessary to append '.' for Windows to run the largefiles tests cleanly. But if '.' is not added to the list, the match function isn't called on Linux, so status() would miss any normal files that were also in a named directory. The commit then proceeds without those normal files, or says "nothing changed" if there were no changed largefiles in the directory. This is not filesystem specific, as VFAT on Linux had the same behavior as when run on ext4. It is also not an issue with lfilesrepo.status(), since that only calls the overridden implementation when paths are passed to commit. I dont have access to an OS X machine ATM to test there. Maybe there's a better way to do this. But since the standin directory for the first largefile was previously being added, and that caused the same walk in status(), there's no preformance change to this. There is no danger of erroneously committing files in '.', because the original match function is called, and if it fails, the lfutil.updatestandinsbymatch() tweaked matcher only indicates a match if the file is in the list of standins- and '.' never is. The added tests confirm this.

# Copyright 2009-2010 Gregory P. Ward
# Copyright 2009-2010 Intelerad Medical Systems Incorporated
# Copyright 2010-2011 Fog Creek Software
# Copyright 2010-2011 Unity Technologies
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

'''setup for largefiles repositories: reposetup'''
import copy
import os

from mercurial import error, manifest, match as match_, util
from mercurial.i18n import _
from mercurial import scmutil

import lfcommands
import lfutil

def reposetup(ui, repo):
    # wire repositories should be given new wireproto functions
    # by "proto.wirereposetup()" via "hg.wirepeersetupfuncs"
    if not repo.local():
        return

    class lfilesrepo(repo.__class__):
        lfstatus = False
        def status_nolfiles(self, *args, **kwargs):
            return super(lfilesrepo, self).status(*args, **kwargs)

        # When lfstatus is set, return a context that gives the names
        # of largefiles instead of their corresponding standins and
        # identifies the largefiles as always binary, regardless of
        # their actual contents.
        def __getitem__(self, changeid):
            ctx = super(lfilesrepo, self).__getitem__(changeid)
            if self.unfiltered().lfstatus:
                class lfilesmanifestdict(manifest.manifestdict):
                    def __contains__(self, filename):
                        orig = super(lfilesmanifestdict, self).__contains__
                        return orig(filename) or orig(lfutil.standin(filename))
                class lfilesctx(ctx.__class__):
                    def files(self):
                        filenames = super(lfilesctx, self).files()
                        return [lfutil.splitstandin(f) or f for f in filenames]
                    def manifest(self):
                        man1 = super(lfilesctx, self).manifest()
                        man1.__class__ = lfilesmanifestdict
                        return man1
                    def filectx(self, path, fileid=None, filelog=None):
                        orig = super(lfilesctx, self).filectx
                        try:
                            if filelog is not None:
                                result = orig(path, fileid, filelog)
                            else:
                                result = orig(path, fileid)
                        except error.LookupError:
                            # Adding a null character will cause Mercurial to
                            # identify this as a binary file.
                            if filelog is not None:
                                result = orig(lfutil.standin(path), fileid,
                                              filelog)
                            else:
                                result = orig(lfutil.standin(path), fileid)
                            olddata = result.data
                            result.data = lambda: olddata() + '\0'
                        return result
                ctx.__class__ = lfilesctx
            return ctx

        # Figure out the status of big files and insert them into the
        # appropriate list in the result. Also removes standin files
        # from the listing. Revert to the original status if
        # self.lfstatus is False.
        def status(self, node1='.', node2=None, match=None, ignored=False,
                clean=False, unknown=False, listsubrepos=False):
            listignored, listclean, listunknown = ignored, clean, unknown
            orig = super(lfilesrepo, self).status

            # When various overrides set repo.lfstatus, the change is redirected
            # to the unfiltered repo, and self.lfstatus is always false when
            # this repo is filtered.
            if not self.unfiltered().lfstatus:
                return orig(node1, node2, match, listignored, listclean,
                            listunknown, listsubrepos)

            # some calls in this function rely on the old version of status
            self.unfiltered().lfstatus = False
            ctx1 = self[node1]
            ctx2 = self[node2]
            working = ctx2.rev() is None
            parentworking = working and ctx1 == self['.']

            if match is None:
                match = match_.always(self.root, self.getcwd())

            wlock = None
            try:
                try:
                    # updating the dirstate is optional
                    # so we don't wait on the lock
                    wlock = self.wlock(False)
                except error.LockError:
                    pass

                # First check if paths or patterns were specified on the
                # command line.  If there were, and they don't match any
                # largefiles, we should just bail here and let super
                # handle it -- thus gaining a big performance boost.
                lfdirstate = lfutil.openlfdirstate(ui, self)
                if not match.always():
                    for f in lfdirstate:
                        if match(f):
                            break
                    else:
                        return orig(node1, node2, match, listignored, listclean,
                                    listunknown, listsubrepos)

                # Create a copy of match that matches standins instead
                # of largefiles.
                def tostandins(files):
                    if not working:
                        return files
                    newfiles = []
                    dirstate = self.dirstate
                    for f in files:
                        sf = lfutil.standin(f)
                        if sf in dirstate:
                            newfiles.append(sf)
                        elif sf in dirstate.dirs():
                            # Directory entries could be regular or
                            # standin, check both
                            newfiles.extend((f, sf))
                        else:
                            newfiles.append(f)
                    return newfiles

                m = copy.copy(match)
                m._files = tostandins(m._files)

                result = orig(node1, node2, m, ignored, clean, unknown,
                              listsubrepos)
                if working:

                    def sfindirstate(f):
                        sf = lfutil.standin(f)
                        dirstate = self.dirstate
                        return sf in dirstate or sf in dirstate.dirs()

                    match._files = [f for f in match._files
                                    if sfindirstate(f)]
                    # Don't waste time getting the ignored and unknown
                    # files from lfdirstate
                    unsure, s = lfdirstate.status(match, [], False, listclean,
                                                  False)
                    (modified, added, removed, clean) = (s.modified, s.added,
                                                         s.removed, s.clean)
                    if parentworking:
                        for lfile in unsure:
                            standin = lfutil.standin(lfile)
                            if standin not in ctx1:
                                # from second parent
                                modified.append(lfile)
                            elif ctx1[standin].data().strip() \
                                    != lfutil.hashfile(self.wjoin(lfile)):
                                modified.append(lfile)
                            else:
                                if listclean:
                                    clean.append(lfile)
                                lfdirstate.normal(lfile)
                    else:
                        tocheck = unsure + modified + added + clean
                        modified, added, clean = [], [], []
                        checkexec = self.dirstate._checkexec

                        for lfile in tocheck:
                            standin = lfutil.standin(lfile)
                            if standin in ctx1:
                                abslfile = self.wjoin(lfile)
                                if ((ctx1[standin].data().strip() !=
                                     lfutil.hashfile(abslfile)) or
                                    (checkexec and
                                     ('x' in ctx1.flags(standin)) !=
                                     bool(lfutil.getexecutable(abslfile)))):
                                    modified.append(lfile)
                                elif listclean:
                                    clean.append(lfile)
                            else:
                                added.append(lfile)

                        # at this point, 'removed' contains largefiles
                        # marked as 'R' in the working context.
                        # then, largefiles not managed also in the target
                        # context should be excluded from 'removed'.
                        removed = [lfile for lfile in removed
                                   if lfutil.standin(lfile) in ctx1]

                    # Standins no longer found in lfdirstate has been
                    # removed
                    for standin in ctx1.walk(lfutil.getstandinmatcher(self)):
                        lfile = lfutil.splitstandin(standin)
                        if not match(lfile):
                            continue
                        if lfile not in lfdirstate:
                            removed.append(lfile)

                    # Filter result lists
                    result = list(result)

                    # Largefiles are not really removed when they're
                    # still in the normal dirstate. Likewise, normal
                    # files are not really removed if they are still in
                    # lfdirstate. This happens in merges where files
                    # change type.
                    removed = [f for f in removed
                               if f not in self.dirstate]
                    result[2] = [f for f in result[2]
                                 if f not in lfdirstate]

                    lfiles = set(lfdirstate._map)
                    # Unknown files
                    result[4] = set(result[4]).difference(lfiles)
                    # Ignored files
                    result[5] = set(result[5]).difference(lfiles)
                    # combine normal files and largefiles
                    normals = [[fn for fn in filelist
                                if not lfutil.isstandin(fn)]
                               for filelist in result]
                    lfstatus = (modified, added, removed, s.deleted, [], [],
                                clean)
                    result = [sorted(list1 + list2)
                              for (list1, list2) in zip(normals, lfstatus)]
                else: # not against working directory
                    result = [[lfutil.splitstandin(f) or f for f in items]
                              for items in result]

                if wlock:
                    lfdirstate.write()

            finally:
                if wlock:
                    wlock.release()

            self.unfiltered().lfstatus = True
            return scmutil.status(*result)

        def commitctx(self, ctx, *args, **kwargs):
            node = super(lfilesrepo, self).commitctx(ctx, *args, **kwargs)
            class lfilesctx(ctx.__class__):
                def markcommitted(self, node):
                    orig = super(lfilesctx, self).markcommitted
                    return lfutil.markcommitted(orig, self, node)
            ctx.__class__ = lfilesctx
            return node

        # Before commit, largefile standins have not had their
        # contents updated to reflect the hash of their largefile.
        # Do that here.
        def commit(self, text="", user=None, date=None, match=None,
                force=False, editor=False, extra={}):
            orig = super(lfilesrepo, self).commit

            wlock = self.wlock()
            try:
                lfcommithook = self._lfcommithooks[-1]
                match = lfcommithook(self, match)
                result = orig(text=text, user=user, date=date, match=match,
                                force=force, editor=editor, extra=extra)
                return result
            finally:
                wlock.release()

        def push(self, remote, force=False, revs=None, newbranch=False):
            if remote.local():
                missing = set(self.requirements) - remote.local().supported
                if missing:
                    msg = _("required features are not"
                            " supported in the destination:"
                            " %s") % (', '.join(sorted(missing)))
                    raise util.Abort(msg)
            return super(lfilesrepo, self).push(remote, force=force, revs=revs,
                newbranch=newbranch)

        # TODO: _subdirlfs should be moved into "lfutil.py", because
        # it is referred only from "lfutil.updatestandinsbymatch"
        def _subdirlfs(self, files, lfiles):
            '''
            Adjust matched file list
            If we pass a directory to commit whose only committable files
            are largefiles, the core commit code aborts before finding
            the largefiles.
            So we do the following:
            For directories that only have largefiles as matches,
            we explicitly add the largefiles to the match list and remove
            the directory.
            In other cases, we leave the match list unmodified.
            '''
            actualfiles = []
            dirs = []
            regulars = []

            for f in files:
                if lfutil.isstandin(f + '/'):
                    raise util.Abort(
                        _('file "%s" is a largefile standin') % f,
                        hint=('commit the largefile itself instead'))
                # Scan directories
                if os.path.isdir(self.wjoin(f)):
                    dirs.append(f)
                else:
                    regulars.append(f)

            for f in dirs:
                matcheddir = False
                d = self.dirstate.normalize(f) + '/'
                # Check for matched normal files
                for mf in regulars:
                    if self.dirstate.normalize(mf).startswith(d):
                        actualfiles.append(f)
                        matcheddir = True
                        break
                if not matcheddir:
                    # If no normal match, manually append
                    # any matching largefiles
                    for lf in lfiles:
                        if self.dirstate.normalize(lf).startswith(d):
                            actualfiles.append(lf)
                            if not matcheddir:
                                # There may still be normal files in the dir, so
                                # make sure a directory is in the list, which
                                # forces status to walk and call the match
                                # function on the matcher.  Windows does NOT
                                # require this.
                                actualfiles.append('.')
                                matcheddir = True
                # Nothing in dir, so readd it
                # and let commit reject it
                if not matcheddir:
                    actualfiles.append(f)

            # Always add normal files
            actualfiles += regulars
            return actualfiles

    repo.__class__ = lfilesrepo

    # stack of hooks being executed before committing.
    # only last element ("_lfcommithooks[-1]") is used for each committing.
    repo._lfcommithooks = [lfutil.updatestandinsbymatch]

    # Stack of status writer functions taking "*msg, **opts" arguments
    # like "ui.status()". Only last element ("_lfstatuswriters[-1]")
    # is used to write status out.
    repo._lfstatuswriters = [ui.status]

    def prepushoutgoinghook(local, remote, outgoing):
        if outgoing.missing:
            toupload = set()
            addfunc = lambda fn, lfhash: toupload.add(lfhash)
            lfutil.getlfilestoupload(local, outgoing.missing, addfunc)
            lfcommands.uploadlfiles(ui, local, remote, toupload)
    repo.prepushoutgoinghooks.add("largefiles", prepushoutgoinghook)

    def checkrequireslfiles(ui, repo, **kwargs):
        if 'largefiles' not in repo.requirements and util.any(
                lfutil.shortname+'/' in f[0] for f in repo.store.datafiles()):
            repo.requirements.add('largefiles')
            repo._writerequirements()

    ui.setconfig('hooks', 'changegroup.lfiles', checkrequireslfiles,
                 'largefiles')
    ui.setconfig('hooks', 'commit.lfiles', checkrequireslfiles, 'largefiles')