diff -r 8df4166b6f63 -r cfccd3bee7b3 hgext/largefiles/lfcommands.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgext/largefiles/lfcommands.py Sat Sep 24 17:35:45 2011 +0200 @@ -0,0 +1,483 @@ +# Copyright 2009-2010 Gregory P. Ward +# Copyright 2009-2010 Intelerad Medical Systems Incorporated +# Copyright 2010-2011 Fog Creek Software +# Copyright 2010-2011 Unity Technologies +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +'''High-level command functions: lfadd() et. al, plus the cmdtable.''' + +import os +import shutil + +from mercurial import util, match as match_, hg, node, context, error +from mercurial.i18n import _ + +import lfutil +import basestore + +# -- Commands ---------------------------------------------------------- + +def lfconvert(ui, src, dest, *pats, **opts): + '''Convert a normal repository to a largefiles repository + + Convert source repository creating an identical repository, except that all + files that match the patterns given, or are over the given size will be + added as largefiles. The size used to determine whether or not to track a + file as a largefile is the size of the first version of the file. After + running this command you will need to make sure that largefiles is enabled + anywhere you intend to push the new repository.''' + + if opts['tonormal']: + tolfile = False + else: + tolfile = True + size = opts['size'] + if not size: + size = ui.config(lfutil.longname, 'size', default=None) + try: + size = int(size) + except ValueError: + raise util.Abort(_('largefiles.size must be integer, was %s\n') % \ + size) + except TypeError: + raise util.Abort(_('size must be specified')) + + try: + rsrc = hg.repository(ui, src) + if not rsrc.local(): + raise util.Abort(_('%s is not a local Mercurial repo') % src) + except error.RepoError, err: + ui.traceback() + raise util.Abort(err.args[0]) + if os.path.exists(dest): + if not os.path.isdir(dest): + raise util.Abort(_('destination %s already exists') % dest) + elif os.listdir(dest): + raise util.Abort(_('destination %s is not empty') % dest) + try: + ui.status(_('initializing destination %s\n') % dest) + rdst = hg.repository(ui, dest, create=True) + if not rdst.local(): + raise util.Abort(_('%s is not a local Mercurial repo') % dest) + except error.RepoError: + ui.traceback() + raise util.Abort(_('%s is not a repo') % dest) + + try: + # Lock destination to prevent modification while it is converted to. + # Don't need to lock src because we are just reading from its history + # which can't change. + dst_lock = rdst.lock() + + # Get a list of all changesets in the source. The easy way to do this + # is to simply walk the changelog, using changelog.nodesbewteen(). + # Take a look at mercurial/revlog.py:639 for more details. + # Use a generator instead of a list to decrease memory usage + ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None, + rsrc.heads())[0]) + revmap = {node.nullid: node.nullid} + if tolfile: + lfiles = set() + normalfiles = set() + if not pats: + pats = ui.config(lfutil.longname, 'patterns', default=()) + if pats: + pats = pats.split(' ') + if pats: + matcher = match_.match(rsrc.root, '', list(pats)) + else: + matcher = None + + lfiletohash = {} + for ctx in ctxs: + ui.progress(_('converting revisions'), ctx.rev(), + unit=_('revision'), total=rsrc['tip'].rev()) + _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, + lfiles, normalfiles, matcher, size, lfiletohash) + ui.progress(_('converting revisions'), None) + + if os.path.exists(rdst.wjoin(lfutil.shortname)): + shutil.rmtree(rdst.wjoin(lfutil.shortname)) + + for f in lfiletohash.keys(): + if os.path.isfile(rdst.wjoin(f)): + os.unlink(rdst.wjoin(f)) + try: + os.removedirs(os.path.dirname(rdst.wjoin(f))) + except: + pass + + else: + for ctx in ctxs: + ui.progress(_('converting revisions'), ctx.rev(), + unit=_('revision'), total=rsrc['tip'].rev()) + _addchangeset(ui, rsrc, rdst, ctx, revmap) + + ui.progress(_('converting revisions'), None) + except: + # we failed, remove the new directory + shutil.rmtree(rdst.root) + raise + finally: + dst_lock.release() + +def _addchangeset(ui, rsrc, rdst, ctx, revmap): + # Convert src parents to dst parents + parents = [] + for p in ctx.parents(): + parents.append(revmap[p.node()]) + while len(parents) < 2: + parents.append(node.nullid) + + # Generate list of changed files + files = set(ctx.files()) + if node.nullid not in parents: + mc = ctx.manifest() + mp1 = ctx.parents()[0].manifest() + mp2 = ctx.parents()[1].manifest() + files |= (set(mp1) | set(mp2)) - set(mc) + for f in mc: + if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): + files.add(f) + + def getfilectx(repo, memctx, f): + if lfutil.standin(f) in files: + # if the file isn't in the manifest then it was removed + # or renamed, raise IOError to indicate this + try: + fctx = ctx.filectx(lfutil.standin(f)) + except error.LookupError: + raise IOError() + renamed = fctx.renamed() + if renamed: + renamed = lfutil.splitstandin(renamed[0]) + + hash = fctx.data().strip() + path = lfutil.findfile(rsrc, hash) + ### TODO: What if the file is not cached? + data = '' + fd = None + try: + fd = open(path, 'rb') + data = fd.read() + finally: + if fd: fd.close() + return context.memfilectx(f, data, 'l' in fctx.flags(), + 'x' in fctx.flags(), renamed) + else: + try: + fctx = ctx.filectx(f) + except error.LookupError: + raise IOError() + renamed = fctx.renamed() + if renamed: + renamed = renamed[0] + data = fctx.data() + if f == '.hgtags': + newdata = [] + for line in data.splitlines(): + id, name = line.split(' ', 1) + newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]), + name)) + data = ''.join(newdata) + return context.memfilectx(f, data, 'l' in fctx.flags(), + 'x' in fctx.flags(), renamed) + + dstfiles = [] + for file in files: + if lfutil.isstandin(file): + dstfiles.append(lfutil.splitstandin(file)) + else: + dstfiles.append(file) + # Commit + mctx = context.memctx(rdst, parents, ctx.description(), dstfiles, + getfilectx, ctx.user(), ctx.date(), ctx.extra()) + ret = rdst.commitctx(mctx) + rdst.dirstate.setparents(ret) + revmap[ctx.node()] = rdst.changelog.tip() + +def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles, + matcher, size, lfiletohash): + # Convert src parents to dst parents + parents = [] + for p in ctx.parents(): + parents.append(revmap[p.node()]) + while len(parents) < 2: + parents.append(node.nullid) + + # Generate list of changed files + files = set(ctx.files()) + if node.nullid not in parents: + mc = ctx.manifest() + mp1 = ctx.parents()[0].manifest() + mp2 = ctx.parents()[1].manifest() + files |= (set(mp1) | set(mp2)) - set(mc) + for f in mc: + if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None): + files.add(f) + + dstfiles = [] + for f in files: + if f not in lfiles and f not in normalfiles: + islfile = _islfile(f, ctx, matcher, size) + # If this file was renamed or copied then copy + # the lfileness of its predecessor + if f in ctx.manifest(): + fctx = ctx.filectx(f) + renamed = fctx.renamed() + renamedlfile = renamed and renamed[0] in lfiles + islfile |= renamedlfile + if 'l' in fctx.flags(): + if renamedlfile: + raise util.Abort( + _('Renamed/copied largefile %s becomes symlink') % f) + islfile = False + if islfile: + lfiles.add(f) + else: + normalfiles.add(f) + + if f in lfiles: + dstfiles.append(lfutil.standin(f)) + # lfile in manifest if it has not been removed/renamed + if f in ctx.manifest(): + if 'l' in ctx.filectx(f).flags(): + if renamed and renamed[0] in lfiles: + raise util.Abort(_('largefile %s becomes symlink') % f) + + # lfile was modified, update standins + fullpath = rdst.wjoin(f) + lfutil.createdir(os.path.dirname(fullpath)) + m = util.sha1('') + m.update(ctx[f].data()) + hash = m.hexdigest() + if f not in lfiletohash or lfiletohash[f] != hash: + try: + fd = open(fullpath, 'wb') + fd.write(ctx[f].data()) + finally: + if fd: + fd.close() + executable = 'x' in ctx[f].flags() + os.chmod(fullpath, lfutil.getmode(executable)) + lfutil.writestandin(rdst, lfutil.standin(f), hash, + executable) + lfiletohash[f] = hash + else: + # normal file + dstfiles.append(f) + + def getfilectx(repo, memctx, f): + if lfutil.isstandin(f): + # if the file isn't in the manifest then it was removed + # or renamed, raise IOError to indicate this + srcfname = lfutil.splitstandin(f) + try: + fctx = ctx.filectx(srcfname) + except error.LookupError: + raise IOError() + renamed = fctx.renamed() + if renamed: + # standin is always a lfile because lfileness + # doesn't change after rename or copy + renamed = lfutil.standin(renamed[0]) + + return context.memfilectx(f, lfiletohash[srcfname], 'l' in + fctx.flags(), 'x' in fctx.flags(), renamed) + else: + try: + fctx = ctx.filectx(f) + except error.LookupError: + raise IOError() + renamed = fctx.renamed() + if renamed: + renamed = renamed[0] + + data = fctx.data() + if f == '.hgtags': + newdata = [] + for line in data.splitlines(): + id, name = line.split(' ', 1) + newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]), + name)) + data = ''.join(newdata) + return context.memfilectx(f, data, 'l' in fctx.flags(), + 'x' in fctx.flags(), renamed) + + # Commit + mctx = context.memctx(rdst, parents, ctx.description(), dstfiles, + getfilectx, ctx.user(), ctx.date(), ctx.extra()) + ret = rdst.commitctx(mctx) + rdst.dirstate.setparents(ret) + revmap[ctx.node()] = rdst.changelog.tip() + +def _islfile(file, ctx, matcher, size): + ''' + A file is a lfile if it matches a pattern or is over + the given size. + ''' + # Never store hgtags or hgignore as lfiles + if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs': + return False + if matcher and matcher(file): + return True + try: + return ctx.filectx(file).size() >= size * 1024 * 1024 + except error.LookupError: + return False + +def uploadlfiles(ui, rsrc, rdst, files): + '''upload largefiles to the central store''' + + # Don't upload locally. All largefiles are in the system wide cache + # so the other repo can just get them from there. + if not files or rdst.local(): + return + + store = basestore._openstore(rsrc, rdst, put=True) + + at = 0 + files = filter(lambda h: not store.exists(h), files) + for hash in files: + ui.progress(_('uploading largefiles'), at, unit='largefile', total=len(files)) + source = lfutil.findfile(rsrc, hash) + if not source: + raise util.Abort(_('Missing largefile %s needs to be uploaded') % hash) + # XXX check for errors here + store.put(source, hash) + at += 1 + ui.progress('uploading largefiles', None) + +def verifylfiles(ui, repo, all=False, contents=False): + '''Verify that every big file revision in the current changeset + exists in the central store. With --contents, also verify that + the contents of each big file revision are correct (SHA-1 hash + matches the revision ID). With --all, check every changeset in + this repository.''' + if all: + # Pass a list to the function rather than an iterator because we know a + # list will work. + revs = range(len(repo)) + else: + revs = ['.'] + + store = basestore._openstore(repo) + return store.verify(revs, contents=contents) + +def cachelfiles(ui, repo, node): + '''cachelfiles ensures that all largefiles needed by the specified revision + are present in the repository's largefile cache. + + returns a tuple (cached, missing). cached is the list of files downloaded + by this operation; missing is the list of files that were needed but could + not be found.''' + lfiles = lfutil.listlfiles(repo, node) + toget = [] + + for lfile in lfiles: + expectedhash = repo[node][lfutil.standin(lfile)].data().strip() + # if it exists and its hash matches, it might have been locally + # modified before updating and the user chose 'local'. in this case, + # it will not be in any store, so don't look for it. + if (not os.path.exists(repo.wjoin(lfile)) \ + or expectedhash != lfutil.hashfile(repo.wjoin(lfile))) and \ + not lfutil.findfile(repo, expectedhash): + toget.append((lfile, expectedhash)) + + if toget: + store = basestore._openstore(repo) + ret = store.get(toget) + return ret + + return ([], []) + +def updatelfiles(ui, repo, filelist=None, printmessage=True): + wlock = repo.wlock() + try: + lfdirstate = lfutil.openlfdirstate(ui, repo) + lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate) + + if filelist is not None: + lfiles = [f for f in lfiles if f in filelist] + + printed = False + if printmessage and lfiles: + ui.status(_('getting changed largefiles\n')) + printed = True + cachelfiles(ui, repo, '.') + + updated, removed = 0, 0 + for i in map(lambda f: _updatelfile(repo, lfdirstate, f), lfiles): + # increment the appropriate counter according to _updatelfile's + # return value + updated += i > 0 and i or 0 + removed -= i < 0 and i or 0 + if printmessage and (removed or updated) and not printed: + ui.status(_('getting changed largefiles\n')) + printed = True + + lfdirstate.write() + if printed and printmessage: + ui.status(_('%d largefiles updated, %d removed\n') % (updated, + removed)) + finally: + wlock.release() + +def _updatelfile(repo, lfdirstate, lfile): + '''updates a single largefile and copies the state of its standin from + the repository's dirstate to its state in the lfdirstate. + + returns 1 if the file was modified, -1 if the file was removed, 0 if the + file was unchanged, and None if the needed largefile was missing from the + cache.''' + ret = 0 + abslfile = repo.wjoin(lfile) + absstandin = repo.wjoin(lfutil.standin(lfile)) + if os.path.exists(absstandin): + if os.path.exists(absstandin+'.orig'): + shutil.copyfile(abslfile, abslfile+'.orig') + expecthash = lfutil.readstandin(repo, lfile) + if expecthash != '' and \ + (not os.path.exists(abslfile) or \ + expecthash != lfutil.hashfile(abslfile)): + if not lfutil.copyfromcache(repo, expecthash, lfile): + return None # don't try to set the mode or update the dirstate + ret = 1 + mode = os.stat(absstandin).st_mode + if mode != os.stat(abslfile).st_mode: + os.chmod(abslfile, mode) + ret = 1 + else: + if os.path.exists(abslfile): + os.unlink(abslfile) + ret = -1 + state = repo.dirstate[lfutil.standin(lfile)] + if state == 'n': + lfdirstate.normal(lfile) + elif state == 'r': + lfdirstate.remove(lfile) + elif state == 'a': + lfdirstate.add(lfile) + elif state == '?': + try: + # Mercurial >= 1.9 + lfdirstate.drop(lfile) + except AttributeError: + # Mercurial <= 1.8 + lfdirstate.forget(lfile) + return ret + +# -- hg commands declarations ------------------------------------------------ + + +cmdtable = { + 'lfconvert': (lfconvert, + [('s', 'size', 0, 'All files over this size (in megabytes) ' + 'will be considered largefiles. This can also be specified in ' + 'your hgrc as [largefiles].size.'), + ('','tonormal',False, + 'Convert from a largefiles repo to a normal repo')], + _('hg lfconvert SOURCE DEST [FILE ...]')), + }