mercurial/sparse.py
author Kevin Bullock <kbullock+mercurial@ringworld.org>
Tue, 06 Mar 2018 13:17:07 -0600
branchstable
changeset 36757 8bba684efde7
parent 35742 7a1806e0daea
child 37576 6ef94f24aa82
permissions -rw-r--r--
merge with security patches

# sparse.py - functionality for sparse checkouts
#
# Copyright 2014 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import collections
import hashlib
import os

from .i18n import _
from .node import (
    hex,
    nullid,
)
from . import (
    error,
    match as matchmod,
    merge as mergemod,
    pathutil,
    pycompat,
    scmutil,
    util,
)

# Whether sparse features are enabled. This variable is intended to be
# temporary to facilitate porting sparse to core. It should eventually be
# a per-repo option, possibly a repo requirement.
enabled = False

def parseconfig(ui, raw):
    """Parse sparse config file content.

    Returns a tuple of includes, excludes, and profiles.
    """
    includes = set()
    excludes = set()
    profiles = set()
    current = None
    havesection = False

    for line in raw.split('\n'):
        line = line.strip()
        if not line or line.startswith('#'):
            # empty or comment line, skip
            continue
        elif line.startswith('%include '):
            line = line[9:].strip()
            if line:
                profiles.add(line)
        elif line == '[include]':
            if havesection and current != includes:
                # TODO pass filename into this API so we can report it.
                raise error.Abort(_('sparse config cannot have includes ' +
                                    'after excludes'))
            havesection = True
            current = includes
            continue
        elif line == '[exclude]':
            havesection = True
            current = excludes
        elif line:
            if current is None:
                raise error.Abort(_('sparse config entry outside of '
                                    'section: %s') % line,
                                  hint=_('add an [include] or [exclude] line '
                                         'to declare the entry type'))

            if line.strip().startswith('/'):
                ui.warn(_('warning: sparse profile cannot use' +
                          ' paths starting with /, ignoring %s\n') % line)
                continue
            current.add(line)

    return includes, excludes, profiles

# Exists as separate function to facilitate monkeypatching.
def readprofile(repo, profile, changeid):
    """Resolve the raw content of a sparse profile file."""
    # TODO add some kind of cache here because this incurs a manifest
    # resolve and can be slow.
    return repo.filectx(profile, changeid=changeid).data()

def patternsforrev(repo, rev):
    """Obtain sparse checkout patterns for the given rev.

    Returns a tuple of iterables representing includes, excludes, and
    patterns.
    """
    # Feature isn't enabled. No-op.
    if not enabled:
        return set(), set(), set()

    raw = repo.vfs.tryread('sparse')
    if not raw:
        return set(), set(), set()

    if rev is None:
        raise error.Abort(_('cannot parse sparse patterns from working '
                            'directory'))

    includes, excludes, profiles = parseconfig(repo.ui, raw)
    ctx = repo[rev]

    if profiles:
        visited = set()
        while profiles:
            profile = profiles.pop()
            if profile in visited:
                continue

            visited.add(profile)

            try:
                raw = readprofile(repo, profile, rev)
            except error.ManifestLookupError:
                msg = (
                    "warning: sparse profile '%s' not found "
                    "in rev %s - ignoring it\n" % (profile, ctx))
                # experimental config: sparse.missingwarning
                if repo.ui.configbool(
                        'sparse', 'missingwarning'):
                    repo.ui.warn(msg)
                else:
                    repo.ui.debug(msg)
                continue

            pincludes, pexcludes, subprofs = parseconfig(repo.ui, raw)
            includes.update(pincludes)
            excludes.update(pexcludes)
            profiles.update(subprofs)

        profiles = visited

    if includes:
        includes.add('.hg*')

    return includes, excludes, profiles

def activeconfig(repo):
    """Determine the active sparse config rules.

    Rules are constructed by reading the current sparse config and bringing in
    referenced profiles from parents of the working directory.
    """
    revs = [repo.changelog.rev(node) for node in
            repo.dirstate.parents() if node != nullid]

    allincludes = set()
    allexcludes = set()
    allprofiles = set()

    for rev in revs:
        includes, excludes, profiles = patternsforrev(repo, rev)
        allincludes |= includes
        allexcludes |= excludes
        allprofiles |= profiles

    return allincludes, allexcludes, allprofiles

def configsignature(repo, includetemp=True):
    """Obtain the signature string for the current sparse configuration.

    This is used to construct a cache key for matchers.
    """
    cache = repo._sparsesignaturecache

    signature = cache.get('signature')

    if includetemp:
        tempsignature = cache.get('tempsignature')
    else:
        tempsignature = '0'

    if signature is None or (includetemp and tempsignature is None):
        signature = hex(hashlib.sha1(repo.vfs.tryread('sparse')).digest())
        cache['signature'] = signature

        if includetemp:
            raw = repo.vfs.tryread('tempsparse')
            tempsignature = hex(hashlib.sha1(raw).digest())
            cache['tempsignature'] = tempsignature

    return '%s %s' % (signature, tempsignature)

def writeconfig(repo, includes, excludes, profiles):
    """Write the sparse config file given a sparse configuration."""
    with repo.vfs('sparse', 'wb') as fh:
        for p in sorted(profiles):
            fh.write('%%include %s\n' % p)

        if includes:
            fh.write('[include]\n')
            for i in sorted(includes):
                fh.write(i)
                fh.write('\n')

        if excludes:
            fh.write('[exclude]\n')
            for e in sorted(excludes):
                fh.write(e)
                fh.write('\n')

    repo._sparsesignaturecache.clear()

def readtemporaryincludes(repo):
    raw = repo.vfs.tryread('tempsparse')
    if not raw:
        return set()

    return set(raw.split('\n'))

def writetemporaryincludes(repo, includes):
    repo.vfs.write('tempsparse', '\n'.join(sorted(includes)))
    repo._sparsesignaturecache.clear()

def addtemporaryincludes(repo, additional):
    includes = readtemporaryincludes(repo)
    for i in additional:
        includes.add(i)
    writetemporaryincludes(repo, includes)

def prunetemporaryincludes(repo):
    if not enabled or not repo.vfs.exists('tempsparse'):
        return

    s = repo.status()
    if s.modified or s.added or s.removed or s.deleted:
        # Still have pending changes. Don't bother trying to prune.
        return

    sparsematch = matcher(repo, includetemp=False)
    dirstate = repo.dirstate
    actions = []
    dropped = []
    tempincludes = readtemporaryincludes(repo)
    for file in tempincludes:
        if file in dirstate and not sparsematch(file):
            message = _('dropping temporarily included sparse files')
            actions.append((file, None, message))
            dropped.append(file)

    typeactions = collections.defaultdict(list)
    typeactions['r'] = actions
    mergemod.applyupdates(repo, typeactions, repo[None], repo['.'], False)

    # Fix dirstate
    for file in dropped:
        dirstate.drop(file)

    repo.vfs.unlink('tempsparse')
    repo._sparsesignaturecache.clear()
    msg = _('cleaned up %d temporarily added file(s) from the '
            'sparse checkout\n')
    repo.ui.status(msg % len(tempincludes))

def forceincludematcher(matcher, includes):
    """Returns a matcher that returns true for any of the forced includes
    before testing against the actual matcher."""
    kindpats = [('path', include, '') for include in includes]
    includematcher = matchmod.includematcher('', '', kindpats)
    return matchmod.unionmatcher([includematcher, matcher])

def matcher(repo, revs=None, includetemp=True):
    """Obtain a matcher for sparse working directories for the given revs.

    If multiple revisions are specified, the matcher is the union of all
    revs.

    ``includetemp`` indicates whether to use the temporary sparse profile.
    """
    # If sparse isn't enabled, sparse matcher matches everything.
    if not enabled:
        return matchmod.always(repo.root, '')

    if not revs or revs == [None]:
        revs = [repo.changelog.rev(node)
                for node in repo.dirstate.parents() if node != nullid]

    signature = configsignature(repo, includetemp=includetemp)

    key = '%s %s' % (signature, ' '.join(map(pycompat.bytestr, revs)))

    result = repo._sparsematchercache.get(key)
    if result:
        return result

    matchers = []
    for rev in revs:
        try:
            includes, excludes, profiles = patternsforrev(repo, rev)

            if includes or excludes:
                matcher = matchmod.match(repo.root, '', [],
                                         include=includes, exclude=excludes,
                                         default='relpath')
                matchers.append(matcher)
        except IOError:
            pass

    if not matchers:
        result = matchmod.always(repo.root, '')
    elif len(matchers) == 1:
        result = matchers[0]
    else:
        result = matchmod.unionmatcher(matchers)

    if includetemp:
        tempincludes = readtemporaryincludes(repo)
        result = forceincludematcher(result, tempincludes)

    repo._sparsematchercache[key] = result

    return result

def filterupdatesactions(repo, wctx, mctx, branchmerge, actions):
    """Filter updates to only lay out files that match the sparse rules."""
    if not enabled:
        return actions

    oldrevs = [pctx.rev() for pctx in wctx.parents()]
    oldsparsematch = matcher(repo, oldrevs)

    if oldsparsematch.always():
        return actions

    files = set()
    prunedactions = {}

    if branchmerge:
        # If we're merging, use the wctx filter, since we're merging into
        # the wctx.
        sparsematch = matcher(repo, [wctx.parents()[0].rev()])
    else:
        # If we're updating, use the target context's filter, since we're
        # moving to the target context.
        sparsematch = matcher(repo, [mctx.rev()])

    temporaryfiles = []
    for file, action in actions.iteritems():
        type, args, msg = action
        files.add(file)
        if sparsematch(file):
            prunedactions[file] = action
        elif type == 'm':
            temporaryfiles.append(file)
            prunedactions[file] = action
        elif branchmerge:
            if type != 'k':
                temporaryfiles.append(file)
                prunedactions[file] = action
        elif type == 'f':
            prunedactions[file] = action
        elif file in wctx:
            prunedactions[file] = ('r', args, msg)

    if len(temporaryfiles) > 0:
        repo.ui.status(_('temporarily included %d file(s) in the sparse '
                         'checkout for merging\n') % len(temporaryfiles))
        addtemporaryincludes(repo, temporaryfiles)

        # Add the new files to the working copy so they can be merged, etc
        actions = []
        message = 'temporarily adding to sparse checkout'
        wctxmanifest = repo[None].manifest()
        for file in temporaryfiles:
            if file in wctxmanifest:
                fctx = repo[None][file]
                actions.append((file, (fctx.flags(), False), message))

        typeactions = collections.defaultdict(list)
        typeactions['g'] = actions
        mergemod.applyupdates(repo, typeactions, repo[None], repo['.'],
                              False)

        dirstate = repo.dirstate
        for file, flags, msg in actions:
            dirstate.normal(file)

    profiles = activeconfig(repo)[2]
    changedprofiles = profiles & files
    # If an active profile changed during the update, refresh the checkout.
    # Don't do this during a branch merge, since all incoming changes should
    # have been handled by the temporary includes above.
    if changedprofiles and not branchmerge:
        mf = mctx.manifest()
        for file in mf:
            old = oldsparsematch(file)
            new = sparsematch(file)
            if not old and new:
                flags = mf.flags(file)
                prunedactions[file] = ('g', (flags, False), '')
            elif old and not new:
                prunedactions[file] = ('r', [], '')

    return prunedactions

def refreshwdir(repo, origstatus, origsparsematch, force=False):
    """Refreshes working directory by taking sparse config into account.

    The old status and sparse matcher is compared against the current sparse
    matcher.

    Will abort if a file with pending changes is being excluded or included
    unless ``force`` is True.
    """
    # Verify there are no pending changes
    pending = set()
    pending.update(origstatus.modified)
    pending.update(origstatus.added)
    pending.update(origstatus.removed)
    sparsematch = matcher(repo)
    abort = False

    for f in pending:
        if not sparsematch(f):
            repo.ui.warn(_("pending changes to '%s'\n") % f)
            abort = not force

    if abort:
        raise error.Abort(_('could not update sparseness due to pending '
                            'changes'))

    # Calculate actions
    dirstate = repo.dirstate
    ctx = repo['.']
    added = []
    lookup = []
    dropped = []
    mf = ctx.manifest()
    files = set(mf)

    actions = {}

    for file in files:
        old = origsparsematch(file)
        new = sparsematch(file)
        # Add files that are newly included, or that don't exist in
        # the dirstate yet.
        if (new and not old) or (old and new and not file in dirstate):
            fl = mf.flags(file)
            if repo.wvfs.exists(file):
                actions[file] = ('e', (fl,), '')
                lookup.append(file)
            else:
                actions[file] = ('g', (fl, False), '')
                added.append(file)
        # Drop files that are newly excluded, or that still exist in
        # the dirstate.
        elif (old and not new) or (not old and not new and file in dirstate):
            dropped.append(file)
            if file not in pending:
                actions[file] = ('r', [], '')

    # Verify there are no pending changes in newly included files
    abort = False
    for file in lookup:
        repo.ui.warn(_("pending changes to '%s'\n") % file)
        abort = not force
    if abort:
        raise error.Abort(_('cannot change sparseness due to pending '
                            'changes (delete the files or use '
                            '--force to bring them back dirty)'))

    # Check for files that were only in the dirstate.
    for file, state in dirstate.iteritems():
        if not file in files:
            old = origsparsematch(file)
            new = sparsematch(file)
            if old and not new:
                dropped.append(file)

    # Apply changes to disk
    typeactions = dict((m, [])
                       for m in 'a f g am cd dc r dm dg m e k p pr'.split())
    for f, (m, args, msg) in actions.iteritems():
        if m not in typeactions:
            typeactions[m] = []
        typeactions[m].append((f, args, msg))

    mergemod.applyupdates(repo, typeactions, repo[None], repo['.'], False)

    # Fix dirstate
    for file in added:
        dirstate.normal(file)

    for file in dropped:
        dirstate.drop(file)

    for file in lookup:
        # File exists on disk, and we're bringing it back in an unknown state.
        dirstate.normallookup(file)

    return added, dropped, lookup

def aftercommit(repo, node):
    """Perform actions after a working directory commit."""
    # This function is called unconditionally, even if sparse isn't
    # enabled.
    ctx = repo[node]

    profiles = patternsforrev(repo, ctx.rev())[2]

    # profiles will only have data if sparse is enabled.
    if profiles & set(ctx.files()):
        origstatus = repo.status()
        origsparsematch = matcher(repo)
        refreshwdir(repo, origstatus, origsparsematch, force=True)

    prunetemporaryincludes(repo)

def _updateconfigandrefreshwdir(repo, includes, excludes, profiles,
                                force=False, removing=False):
    """Update the sparse config and working directory state."""
    raw = repo.vfs.tryread('sparse')
    oldincludes, oldexcludes, oldprofiles = parseconfig(repo.ui, raw)

    oldstatus = repo.status()
    oldmatch = matcher(repo)
    oldrequires = set(repo.requirements)

    # TODO remove this try..except once the matcher integrates better
    # with dirstate. We currently have to write the updated config
    # because that will invalidate the matcher cache and force a
    # re-read. We ideally want to update the cached matcher on the
    # repo instance then flush the new config to disk once wdir is
    # updated. But this requires massive rework to matcher() and its
    # consumers.

    if 'exp-sparse' in oldrequires and removing:
        repo.requirements.discard('exp-sparse')
        scmutil.writerequires(repo.vfs, repo.requirements)
    elif 'exp-sparse' not in oldrequires:
        repo.requirements.add('exp-sparse')
        scmutil.writerequires(repo.vfs, repo.requirements)

    try:
        writeconfig(repo, includes, excludes, profiles)
        return refreshwdir(repo, oldstatus, oldmatch, force=force)
    except Exception:
        if repo.requirements != oldrequires:
            repo.requirements.clear()
            repo.requirements |= oldrequires
            scmutil.writerequires(repo.vfs, repo.requirements)
        writeconfig(repo, oldincludes, oldexcludes, oldprofiles)
        raise

def clearrules(repo, force=False):
    """Clears include/exclude rules from the sparse config.

    The remaining sparse config only has profiles, if defined. The working
    directory is refreshed, as needed.
    """
    with repo.wlock():
        raw = repo.vfs.tryread('sparse')
        includes, excludes, profiles = parseconfig(repo.ui, raw)

        if not includes and not excludes:
            return

        _updateconfigandrefreshwdir(repo, set(), set(), profiles, force=force)

def importfromfiles(repo, opts, paths, force=False):
    """Import sparse config rules from files.

    The updated sparse config is written out and the working directory
    is refreshed, as needed.
    """
    with repo.wlock():
        # read current configuration
        raw = repo.vfs.tryread('sparse')
        includes, excludes, profiles = parseconfig(repo.ui, raw)
        aincludes, aexcludes, aprofiles = activeconfig(repo)

        # Import rules on top; only take in rules that are not yet
        # part of the active rules.
        changed = False
        for p in paths:
            with util.posixfile(util.expandpath(p)) as fh:
                raw = fh.read()

            iincludes, iexcludes, iprofiles = parseconfig(repo.ui, raw)
            oldsize = len(includes) + len(excludes) + len(profiles)
            includes.update(iincludes - aincludes)
            excludes.update(iexcludes - aexcludes)
            profiles.update(iprofiles - aprofiles)
            if len(includes) + len(excludes) + len(profiles) > oldsize:
                changed = True

        profilecount = includecount = excludecount = 0
        fcounts = (0, 0, 0)

        if changed:
            profilecount = len(profiles - aprofiles)
            includecount = len(includes - aincludes)
            excludecount = len(excludes - aexcludes)

            fcounts = map(len, _updateconfigandrefreshwdir(
                repo, includes, excludes, profiles, force=force))

        printchanges(repo.ui, opts, profilecount, includecount, excludecount,
                     *fcounts)

def updateconfig(repo, pats, opts, include=False, exclude=False, reset=False,
                 delete=False, enableprofile=False, disableprofile=False,
                 force=False, usereporootpaths=False):
    """Perform a sparse config update.

    Only one of the actions may be performed.

    The new config is written out and a working directory refresh is performed.
    """
    with repo.wlock():
        raw = repo.vfs.tryread('sparse')
        oldinclude, oldexclude, oldprofiles = parseconfig(repo.ui, raw)

        if reset:
            newinclude = set()
            newexclude = set()
            newprofiles = set()
        else:
            newinclude = set(oldinclude)
            newexclude = set(oldexclude)
            newprofiles = set(oldprofiles)

        if any(os.path.isabs(pat) for pat in pats):
            raise error.Abort(_('paths cannot be absolute'))

        if not usereporootpaths:
            # let's treat paths as relative to cwd
            root, cwd = repo.root, repo.getcwd()
            abspats = []
            for kindpat in pats:
                kind, pat = matchmod._patsplit(kindpat, None)
                if kind in matchmod.cwdrelativepatternkinds or kind is None:
                    ap = (kind + ':' if kind else '') +\
                            pathutil.canonpath(root, cwd, pat)
                    abspats.append(ap)
                else:
                    abspats.append(kindpat)
            pats = abspats

        if include:
            newinclude.update(pats)
        elif exclude:
            newexclude.update(pats)
        elif enableprofile:
            newprofiles.update(pats)
        elif disableprofile:
            newprofiles.difference_update(pats)
        elif delete:
            newinclude.difference_update(pats)
            newexclude.difference_update(pats)

        profilecount = (len(newprofiles - oldprofiles) -
                        len(oldprofiles - newprofiles))
        includecount = (len(newinclude - oldinclude) -
                        len(oldinclude - newinclude))
        excludecount = (len(newexclude - oldexclude) -
                        len(oldexclude - newexclude))

        fcounts = map(len, _updateconfigandrefreshwdir(
            repo, newinclude, newexclude, newprofiles, force=force,
            removing=reset))

        printchanges(repo.ui, opts, profilecount, includecount,
                     excludecount, *fcounts)

def printchanges(ui, opts, profilecount=0, includecount=0, excludecount=0,
                 added=0, dropped=0, conflicting=0):
    """Print output summarizing sparse config changes."""
    with ui.formatter('sparse', opts) as fm:
        fm.startitem()
        fm.condwrite(ui.verbose, 'profiles_added', _('Profiles changed: %d\n'),
                     profilecount)
        fm.condwrite(ui.verbose, 'include_rules_added',
                     _('Include rules changed: %d\n'), includecount)
        fm.condwrite(ui.verbose, 'exclude_rules_added',
                     _('Exclude rules changed: %d\n'), excludecount)

        # In 'plain' verbose mode, mergemod.applyupdates already outputs what
        # files are added or removed outside of the templating formatter
        # framework. No point in repeating ourselves in that case.
        if not fm.isplain():
            fm.condwrite(ui.verbose, 'files_added', _('Files added: %d\n'),
                         added)
            fm.condwrite(ui.verbose, 'files_dropped', _('Files dropped: %d\n'),
                         dropped)
            fm.condwrite(ui.verbose, 'files_conflicting',
                         _('Files conflicting: %d\n'), conflicting)