hgext/largefiles/lfcommands.py
changeset 15168 cfccd3bee7b3
child 15170 c1a4a3220711
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/lfcommands.py	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,483 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''High-level command functions: lfadd() et. al, plus the cmdtable.'''
+
+import os
+import shutil
+
+from mercurial import util, match as match_, hg, node, context, error
+from mercurial.i18n import _
+
+import lfutil
+import basestore
+
+# -- Commands ----------------------------------------------------------
+
+def lfconvert(ui, src, dest, *pats, **opts):
+    '''Convert a normal repository to a largefiles repository
+
+    Convert source repository creating an identical repository, except that all
+    files that match the patterns given, or are over the given size will be
+    added as largefiles. The size used to determine whether or not to track a
+    file as a largefile is the size of the first version of the file. After
+    running this command you will need to make sure that largefiles is enabled
+    anywhere you intend to push the new repository.'''
+
+    if opts['tonormal']:
+        tolfile = False
+    else:
+        tolfile = True
+        size = opts['size']
+        if not size:
+            size = ui.config(lfutil.longname, 'size', default=None)
+            try:
+                size = int(size)
+            except ValueError:
+                raise util.Abort(_('largefiles.size must be integer, was %s\n') % \
+                    size)
+            except TypeError:
+                raise util.Abort(_('size must be specified'))
+
+    try:
+        rsrc = hg.repository(ui, src)
+        if not rsrc.local():
+            raise util.Abort(_('%s is not a local Mercurial repo') % src)
+    except error.RepoError, err:
+        ui.traceback()
+        raise util.Abort(err.args[0])
+    if os.path.exists(dest):
+        if not os.path.isdir(dest):
+            raise util.Abort(_('destination %s already exists') % dest)
+        elif os.listdir(dest):
+            raise util.Abort(_('destination %s is not empty') % dest)
+    try:
+        ui.status(_('initializing destination %s\n') % dest)
+        rdst = hg.repository(ui, dest, create=True)
+        if not rdst.local():
+            raise util.Abort(_('%s is not a local Mercurial repo') % dest)
+    except error.RepoError:
+        ui.traceback()
+        raise util.Abort(_('%s is not a repo') % dest)
+
+    try:
+        # Lock destination to prevent modification while it is converted to.
+        # Don't need to lock src because we are just reading from its history
+        # which can't change.
+        dst_lock = rdst.lock()
+
+        # Get a list of all changesets in the source.  The easy way to do this
+        # is to simply walk the changelog, using changelog.nodesbewteen().
+        # Take a look at mercurial/revlog.py:639 for more details.
+        # Use a generator instead of a list to decrease memory usage
+        ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
+            rsrc.heads())[0])
+        revmap = {node.nullid: node.nullid}
+        if tolfile:
+            lfiles = set()
+            normalfiles = set()
+            if not pats:
+                pats = ui.config(lfutil.longname, 'patterns', default=())
+                if pats:
+                    pats = pats.split(' ')
+            if pats:
+                matcher = match_.match(rsrc.root, '', list(pats))
+            else:
+                matcher = None
+
+            lfiletohash = {}
+            for ctx in ctxs:
+                ui.progress(_('converting revisions'), ctx.rev(),
+                    unit=_('revision'), total=rsrc['tip'].rev())
+                _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
+                    lfiles, normalfiles, matcher, size, lfiletohash)
+            ui.progress(_('converting revisions'), None)
+
+            if os.path.exists(rdst.wjoin(lfutil.shortname)):
+                shutil.rmtree(rdst.wjoin(lfutil.shortname))
+
+            for f in lfiletohash.keys():
+                if os.path.isfile(rdst.wjoin(f)):
+                    os.unlink(rdst.wjoin(f))
+                try:
+                    os.removedirs(os.path.dirname(rdst.wjoin(f)))
+                except:
+                    pass
+
+        else:
+            for ctx in ctxs:
+                ui.progress(_('converting revisions'), ctx.rev(),
+                    unit=_('revision'), total=rsrc['tip'].rev())
+                _addchangeset(ui, rsrc, rdst, ctx, revmap)
+
+            ui.progress(_('converting revisions'), None)
+    except:
+        # we failed, remove the new directory
+        shutil.rmtree(rdst.root)
+        raise
+    finally:
+        dst_lock.release()
+
+def _addchangeset(ui, rsrc, rdst, ctx, revmap):
+ # Convert src parents to dst parents
+    parents = []
+    for p in ctx.parents():
+        parents.append(revmap[p.node()])
+    while len(parents) < 2:
+        parents.append(node.nullid)
+
+    # Generate list of changed files
+    files = set(ctx.files())
+    if node.nullid not in parents:
+        mc = ctx.manifest()
+        mp1 = ctx.parents()[0].manifest()
+        mp2 = ctx.parents()[1].manifest()
+        files |= (set(mp1) | set(mp2)) - set(mc)
+        for f in mc:
+            if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
+                files.add(f)
+
+    def getfilectx(repo, memctx, f):
+        if lfutil.standin(f) in files:
+            # if the file isn't in the manifest then it was removed
+            # or renamed, raise IOError to indicate this
+            try:
+                fctx = ctx.filectx(lfutil.standin(f))
+            except error.LookupError:
+                raise IOError()
+            renamed = fctx.renamed()
+            if renamed:
+                renamed = lfutil.splitstandin(renamed[0])
+
+            hash = fctx.data().strip()
+            path = lfutil.findfile(rsrc, hash)
+            ### TODO: What if the file is not cached?
+            data = ''
+            fd = None
+            try:
+                fd = open(path, 'rb')
+                data = fd.read()
+            finally:
+                if fd: fd.close()
+            return context.memfilectx(f, data, 'l' in fctx.flags(),
+                                      'x' in fctx.flags(), renamed)
+        else:
+            try:
+                fctx = ctx.filectx(f)
+            except error.LookupError:
+                raise IOError()
+            renamed = fctx.renamed()
+            if renamed:
+                renamed = renamed[0]
+            data = fctx.data()
+            if f == '.hgtags':
+                newdata = []
+                for line in data.splitlines():
+                    id, name = line.split(' ', 1)
+                    newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]),
+                        name))
+                data = ''.join(newdata)
+            return context.memfilectx(f, data, 'l' in fctx.flags(),
+                                      'x' in fctx.flags(), renamed)
+
+    dstfiles = []
+    for file in files:
+        if lfutil.isstandin(file):
+            dstfiles.append(lfutil.splitstandin(file))
+        else:
+            dstfiles.append(file)
+    # Commit
+    mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
+                          getfilectx, ctx.user(), ctx.date(), ctx.extra())
+    ret = rdst.commitctx(mctx)
+    rdst.dirstate.setparents(ret)
+    revmap[ctx.node()] = rdst.changelog.tip()
+
+def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
+        matcher, size, lfiletohash):
+    # Convert src parents to dst parents
+    parents = []
+    for p in ctx.parents():
+        parents.append(revmap[p.node()])
+    while len(parents) < 2:
+        parents.append(node.nullid)
+
+    # Generate list of changed files
+    files = set(ctx.files())
+    if node.nullid not in parents:
+        mc = ctx.manifest()
+        mp1 = ctx.parents()[0].manifest()
+        mp2 = ctx.parents()[1].manifest()
+        files |= (set(mp1) | set(mp2)) - set(mc)
+        for f in mc:
+            if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
+                files.add(f)
+
+    dstfiles = []
+    for f in files:
+        if f not in lfiles and f not in normalfiles:
+            islfile = _islfile(f, ctx, matcher, size)
+            # If this file was renamed or copied then copy
+            # the lfileness of its predecessor
+            if f in ctx.manifest():
+                fctx = ctx.filectx(f)
+                renamed = fctx.renamed()
+                renamedlfile = renamed and renamed[0] in lfiles
+                islfile |= renamedlfile
+                if 'l' in fctx.flags():
+                    if renamedlfile:
+                        raise util.Abort(
+                            _('Renamed/copied largefile %s becomes symlink') % f)
+                    islfile = False
+            if islfile:
+                lfiles.add(f)
+            else:
+                normalfiles.add(f)
+
+        if f in lfiles:
+            dstfiles.append(lfutil.standin(f))
+            # lfile in manifest if it has not been removed/renamed
+            if f in ctx.manifest():
+                if 'l' in ctx.filectx(f).flags():
+                    if renamed and renamed[0] in lfiles:
+                        raise util.Abort(_('largefile %s becomes symlink') % f)
+
+                # lfile was modified, update standins
+                fullpath = rdst.wjoin(f)
+                lfutil.createdir(os.path.dirname(fullpath))
+                m = util.sha1('')
+                m.update(ctx[f].data())
+                hash = m.hexdigest()
+                if f not in lfiletohash or lfiletohash[f] != hash:
+                    try:
+                        fd = open(fullpath, 'wb')
+                        fd.write(ctx[f].data())
+                    finally:
+                        if fd:
+                            fd.close()
+                    executable = 'x' in ctx[f].flags()
+                    os.chmod(fullpath, lfutil.getmode(executable))
+                    lfutil.writestandin(rdst, lfutil.standin(f), hash,
+                        executable)
+                    lfiletohash[f] = hash
+        else:
+            # normal file
+            dstfiles.append(f)
+
+    def getfilectx(repo, memctx, f):
+        if lfutil.isstandin(f):
+            # if the file isn't in the manifest then it was removed
+            # or renamed, raise IOError to indicate this
+            srcfname = lfutil.splitstandin(f)
+            try:
+                fctx = ctx.filectx(srcfname)
+            except error.LookupError:
+                raise IOError()
+            renamed = fctx.renamed()
+            if renamed:
+                # standin is always a lfile because lfileness
+                # doesn't change after rename or copy
+                renamed = lfutil.standin(renamed[0])
+
+            return context.memfilectx(f, lfiletohash[srcfname], 'l' in
+                fctx.flags(), 'x' in fctx.flags(), renamed)
+        else:
+            try:
+                fctx = ctx.filectx(f)
+            except error.LookupError:
+                raise IOError()
+            renamed = fctx.renamed()
+            if renamed:
+                renamed = renamed[0]
+
+            data = fctx.data()
+            if f == '.hgtags':
+                newdata = []
+                for line in data.splitlines():
+                    id, name = line.split(' ', 1)
+                    newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]),
+                        name))
+                data = ''.join(newdata)
+            return context.memfilectx(f, data, 'l' in fctx.flags(),
+                                      'x' in fctx.flags(), renamed)
+
+    # Commit
+    mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
+                          getfilectx, ctx.user(), ctx.date(), ctx.extra())
+    ret = rdst.commitctx(mctx)
+    rdst.dirstate.setparents(ret)
+    revmap[ctx.node()] = rdst.changelog.tip()
+
+def _islfile(file, ctx, matcher, size):
+    '''
+    A file is a lfile if it matches a pattern or is over
+    the given size.
+    '''
+    # Never store hgtags or hgignore as lfiles
+    if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
+        return False
+    if matcher and matcher(file):
+        return True
+    try:
+        return ctx.filectx(file).size() >= size * 1024 * 1024
+    except error.LookupError:
+        return False
+
+def uploadlfiles(ui, rsrc, rdst, files):
+    '''upload largefiles to the central store'''
+
+    # Don't upload locally. All largefiles are in the system wide cache
+    # so the other repo can just get them from there.
+    if not files or rdst.local():
+        return
+
+    store = basestore._openstore(rsrc, rdst, put=True)
+
+    at = 0
+    files = filter(lambda h: not store.exists(h), files)
+    for hash in files:
+        ui.progress(_('uploading largefiles'), at, unit='largefile', total=len(files))
+        source = lfutil.findfile(rsrc, hash)
+        if not source:
+            raise util.Abort(_('Missing largefile %s needs to be uploaded') % hash)
+        # XXX check for errors here
+        store.put(source, hash)
+        at += 1
+    ui.progress('uploading largefiles', None)
+
+def verifylfiles(ui, repo, all=False, contents=False):
+    '''Verify that every big file revision in the current changeset
+    exists in the central store.  With --contents, also verify that
+    the contents of each big file revision are correct (SHA-1 hash
+    matches the revision ID).  With --all, check every changeset in
+    this repository.'''
+    if all:
+        # Pass a list to the function rather than an iterator because we know a
+        # list will work.
+        revs = range(len(repo))
+    else:
+        revs = ['.']
+
+    store = basestore._openstore(repo)
+    return store.verify(revs, contents=contents)
+
+def cachelfiles(ui, repo, node):
+    '''cachelfiles ensures that all largefiles needed by the specified revision
+    are present in the repository's largefile cache.
+
+    returns a tuple (cached, missing).  cached is the list of files downloaded
+    by this operation; missing is the list of files that were needed but could
+    not be found.'''
+    lfiles = lfutil.listlfiles(repo, node)
+    toget = []
+
+    for lfile in lfiles:
+        expectedhash = repo[node][lfutil.standin(lfile)].data().strip()
+        # if it exists and its hash matches, it might have been locally
+        # modified before updating and the user chose 'local'.  in this case,
+        # it will not be in any store, so don't look for it.
+        if (not os.path.exists(repo.wjoin(lfile)) \
+                or expectedhash != lfutil.hashfile(repo.wjoin(lfile))) and \
+                not lfutil.findfile(repo, expectedhash):
+            toget.append((lfile, expectedhash))
+
+    if toget:
+        store = basestore._openstore(repo)
+        ret = store.get(toget)
+        return ret
+
+    return ([], [])
+
+def updatelfiles(ui, repo, filelist=None, printmessage=True):
+    wlock = repo.wlock()
+    try:
+        lfdirstate = lfutil.openlfdirstate(ui, repo)
+        lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
+
+        if filelist is not None:
+            lfiles = [f for f in lfiles if f in filelist]
+
+        printed = False
+        if printmessage and lfiles:
+            ui.status(_('getting changed largefiles\n'))
+            printed = True
+            cachelfiles(ui, repo, '.')
+
+        updated, removed = 0, 0
+        for i in map(lambda f: _updatelfile(repo, lfdirstate, f), lfiles):
+            # increment the appropriate counter according to _updatelfile's
+            # return value
+            updated += i > 0 and i or 0
+            removed -= i < 0 and i or 0
+            if printmessage and (removed or updated) and not printed:
+                ui.status(_('getting changed largefiles\n'))
+                printed = True
+
+        lfdirstate.write()
+        if printed and printmessage:
+            ui.status(_('%d largefiles updated, %d removed\n') % (updated,
+                removed))
+    finally:
+        wlock.release()
+
+def _updatelfile(repo, lfdirstate, lfile):
+    '''updates a single largefile and copies the state of its standin from
+    the repository's dirstate to its state in the lfdirstate.
+
+    returns 1 if the file was modified, -1 if the file was removed, 0 if the
+    file was unchanged, and None if the needed largefile was missing from the
+    cache.'''
+    ret = 0
+    abslfile = repo.wjoin(lfile)
+    absstandin = repo.wjoin(lfutil.standin(lfile))
+    if os.path.exists(absstandin):
+        if os.path.exists(absstandin+'.orig'):
+            shutil.copyfile(abslfile, abslfile+'.orig')
+        expecthash = lfutil.readstandin(repo, lfile)
+        if expecthash != '' and \
+                (not os.path.exists(abslfile) or \
+                expecthash != lfutil.hashfile(abslfile)):
+            if not lfutil.copyfromcache(repo, expecthash, lfile):
+                return None # don't try to set the mode or update the dirstate
+            ret = 1
+        mode = os.stat(absstandin).st_mode
+        if mode != os.stat(abslfile).st_mode:
+            os.chmod(abslfile, mode)
+            ret = 1
+    else:
+        if os.path.exists(abslfile):
+            os.unlink(abslfile)
+            ret = -1
+    state = repo.dirstate[lfutil.standin(lfile)]
+    if state == 'n':
+        lfdirstate.normal(lfile)
+    elif state == 'r':
+        lfdirstate.remove(lfile)
+    elif state == 'a':
+        lfdirstate.add(lfile)
+    elif state == '?':
+        try:
+            # Mercurial >= 1.9
+            lfdirstate.drop(lfile)
+        except AttributeError:
+            # Mercurial <= 1.8
+            lfdirstate.forget(lfile)
+    return ret
+
+# -- hg commands declarations ------------------------------------------------
+
+
+cmdtable = {
+    'lfconvert': (lfconvert,
+                  [('s', 'size', 0, 'All files over this size (in megabytes) '
+                  'will be considered largefiles. This can also be specified in '
+                  'your hgrc as [largefiles].size.'),
+                  ('','tonormal',False,
+                      'Convert from a largefiles repo to a normal repo')],
+                  _('hg lfconvert SOURCE DEST [FILE ...]')),
+    }