hgext: add largefiles extension
authorvarious
Sat, 24 Sep 2011 17:35:45 +0200
changeset 15168 cfccd3bee7b3
parent 15167 8df4166b6f63
child 15169 aa262fff87ac
hgext: add largefiles extension This code has a number of contributors and a complicated history prior to its introduction that can be seen by visiting: https://developers.kilnhg.com/Repo/Kiln/largefiles/largefiles http://hg.gerg.ca/hg-bfiles and looking at the included copyright notices and contributors list.
hgext/largefiles/CONTRIBUTORS
hgext/largefiles/__init__.py
hgext/largefiles/basestore.py
hgext/largefiles/design.txt
hgext/largefiles/lfcommands.py
hgext/largefiles/lfutil.py
hgext/largefiles/localstore.py
hgext/largefiles/overrides.py
hgext/largefiles/proto.py
hgext/largefiles/remotestore.py
hgext/largefiles/reposetup.py
hgext/largefiles/uisetup.py
hgext/largefiles/usage.txt
hgext/largefiles/wirestore.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/CONTRIBUTORS	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,4 @@
+Greg Ward, author of the original bfiles extension
+Na'Tosha Bard of Unity Technologies
+Fog Creek Software
+Special thanks to the University of Toronto and the UCOSP program
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/__init__.py	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,40 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''track large binary files
+
+Large binary files tend to be not very compressible, not very "diffable", and
+not at all mergeable.  Such files are not handled well by Mercurial\'s storage
+format (revlog), which is based on compressed binary deltas.  largefiles solves
+this problem by adding a centralized client-server layer on top of Mercurial:
+largefiles live in a *central store* out on the network somewhere, and you only
+fetch the ones that you need when you need them.
+
+largefiles works by maintaining a *standin* in .hglf/ for each largefile.  The
+standins are small (41 bytes: an SHA-1 hash plus newline) and are tracked by
+Mercurial.  Largefile revisions are identified by the SHA-1 hash of their
+contents, which is written to the standin.  largefiles uses that revision ID to
+get/put largefile revisions from/to the central store.
+
+A complete tutorial for using lfiles is included in ``usage.txt`` in the lfiles
+source distribution.  See
+https://developers.kilnhg.com/Repo/Kiln/largefiles/largefiles/File/usage.txt
+'''
+
+from mercurial import commands
+
+import lfcommands
+import reposetup
+import uisetup
+
+reposetup = reposetup.reposetup
+uisetup = uisetup.uisetup
+
+commands.norepo += " lfconvert"
+
+cmdtable = lfcommands.cmdtable
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/basestore.py	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,201 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''Base class for store implementations and store-related utility code.'''
+
+import os
+import tempfile
+import binascii
+import re
+
+from mercurial import util, node, hg
+from mercurial.i18n import _
+
+import lfutil
+
+class StoreError(Exception):
+    '''Raised when there is a problem getting files from or putting
+    files to a central store.'''
+    def __init__(self, filename, hash, url, detail):
+        self.filename = filename
+        self.hash = hash
+        self.url = url
+        self.detail = detail
+
+    def longmessage(self):
+        if self.url:
+            return ('%s: %s\n'
+                    '(failed URL: %s)\n'
+                    % (self.filename, self.detail, self.url))
+        else:
+            return ('%s: %s\n'
+                    '(no default or default-push path set in hgrc)\n'
+                    % (self.filename, self.detail))
+
+    def __str__(self):
+        return "%s: %s" % (self.url, self.detail)
+
+class basestore(object):
+    def __init__(self, ui, repo, url):
+        self.ui = ui
+        self.repo = repo
+        self.url = url
+
+    def put(self, source, hash):
+        '''Put source file into the store under <filename>/<hash>.'''
+        raise NotImplementedError('abstract method')
+
+    def exists(self, hash):
+        '''Check to see if the store contains the given hash.'''
+        raise NotImplementedError('abstract method')
+
+    def get(self, files):
+        '''Get the specified largefiles from the store and write to local
+        files under repo.root.  files is a list of (filename, hash)
+        tuples.  Return (success, missing), lists of files successfuly
+        downloaded and those not found in the store.  success is a list
+        of (filename, hash) tuples; missing is a list of filenames that
+        we could not get.  (The detailed error message will already have
+        been presented to the user, so missing is just supplied as a
+        summary.)'''
+        success = []
+        missing = []
+        ui = self.ui
+
+        at = 0
+        for filename, hash in files:
+            ui.progress(_('getting largefiles'), at, unit='lfile',
+                total=len(files))
+            at += 1
+            ui.note(_('getting %s:%s\n') % (filename, hash))
+
+            cachefilename = lfutil.cachepath(self.repo, hash)
+            cachedir = os.path.dirname(cachefilename)
+
+            # No need to pass mode='wb' to fdopen(), since mkstemp() already
+            # opened the file in binary mode.
+            (tmpfd, tmpfilename) = tempfile.mkstemp(
+                dir=cachedir, prefix=os.path.basename(filename))
+            tmpfile = os.fdopen(tmpfd, 'w')
+
+            try:
+                hhash = binascii.hexlify(self._getfile(tmpfile, filename, hash))
+            except StoreError, err:
+                ui.warn(err.longmessage())
+                hhash = ""
+
+            if hhash != hash:
+                if hhash != "":
+                    ui.warn(_('%s: data corruption (expected %s, got %s)\n')
+                            % (filename, hash, hhash))
+                tmpfile.close() # no-op if it's already closed
+                os.remove(tmpfilename)
+                missing.append(filename)
+                continue
+
+            if os.path.exists(cachefilename): # Windows
+                os.remove(cachefilename)
+            os.rename(tmpfilename, cachefilename)
+            lfutil.linktosystemcache(self.repo, hash)
+            success.append((filename, hhash))
+
+        ui.progress(_('getting largefiles'), None)
+        return (success, missing)
+
+    def verify(self, revs, contents=False):
+        '''Verify the existence (and, optionally, contents) of every big
+        file revision referenced by every changeset in revs.
+        Return 0 if all is well, non-zero on any errors.'''
+        write = self.ui.write
+        failed = False
+
+        write(_('searching %d changesets for largefiles\n') % len(revs))
+        verified = set()                # set of (filename, filenode) tuples
+
+        for rev in revs:
+            cctx = self.repo[rev]
+            cset = "%d:%s" % (cctx.rev(), node.short(cctx.node()))
+
+            failed = lfutil.any_(self._verifyfile(
+                cctx, cset, contents, standin, verified) for standin in cctx)
+
+        num_revs = len(verified)
+        num_lfiles = len(set([fname for (fname, fnode) in verified]))
+        if contents:
+            write(_('verified contents of %d revisions of %d largefiles\n')
+                  % (num_revs, num_lfiles))
+        else:
+            write(_('verified existence of %d revisions of %d largefiles\n')
+                  % (num_revs, num_lfiles))
+
+        return int(failed)
+
+    def _getfile(self, tmpfile, filename, hash):
+        '''Fetch one revision of one file from the store and write it
+        to tmpfile.  Compute the hash of the file on-the-fly as it
+        downloads and return the binary hash.  Close tmpfile.  Raise
+        StoreError if unable to download the file (e.g. it does not
+        exist in the store).'''
+        raise NotImplementedError('abstract method')
+
+    def _verifyfile(self, cctx, cset, contents, standin, verified):
+        '''Perform the actual verification of a file in the store.
+        '''
+        raise NotImplementedError('abstract method')
+
+import localstore, wirestore
+
+_storeprovider = {
+    'file':  [localstore.localstore],
+    'http':  [wirestore.wirestore],
+    'https': [wirestore.wirestore],
+    'ssh': [wirestore.wirestore],
+    }
+
+_scheme_re = re.compile(r'^([a-zA-Z0-9+-.]+)://')
+
+# During clone this function is passed the src's ui object
+# but it needs the dest's ui object so it can read out of
+# the config file. Use repo.ui instead.
+def _openstore(repo, remote=None, put=False):
+    ui = repo.ui
+
+    if not remote:
+        path = getattr(repo, 'lfpullsource', None) or \
+            ui.expandpath('default-push', 'default')
+        # If 'default-push' and 'default' can't be expanded
+        # they are just returned. In that case use the empty string which
+        # use the filescheme.
+        if path == 'default-push' or path == 'default':
+            path = ''
+            remote = repo
+        else:
+            remote = hg.peer(repo, {}, path)
+
+    # The path could be a scheme so use Mercurial's normal functionality
+    # to resolve the scheme to a repository and use its path
+    path = hasattr(remote, 'url') and remote.url() or remote.path
+
+    match = _scheme_re.match(path)
+    if not match:                       # regular filesystem path
+        scheme = 'file'
+    else:
+        scheme = match.group(1)
+
+    try:
+        storeproviders = _storeprovider[scheme]
+    except KeyError:
+        raise util.Abort(_('unsupported URL scheme %r') % scheme)
+
+    for class_obj in storeproviders:
+        try:
+            return class_obj(ui, repo, remote)
+        except lfutil.storeprotonotcapable:
+            pass
+
+    raise util.Abort(_('%s does not appear to be a lfile store'), path)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/design.txt	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,49 @@
+= largefiles - manage large binary files =
+This extension is based off of Greg Ward's bfiles extension which can be found
+at http://mercurial.selenic.com/wiki/BfilesExtension.
+
+== The largefile store ==
+
+largefile stores are, in the typical use case, centralized servers that have
+every past revision of a given binary file.  Each largefile is identified by
+its sha1 hash, and all interactions with the store take one of the following
+forms.
+
+-Download a bfile with this hash
+-Upload a bfile with this hash
+-Check if the store has a bfile with this hash
+
+largefiles stores can take one of two forms:
+
+-Directories on a network file share
+-Mercurial wireproto servers, either via ssh or http (hgweb)
+
+== The Local Repository ==
+
+The local repository has a largefile cache in .hg/largefiles which holds a
+subset of the largefiles needed. On a clone only the largefiles at tip are
+downloaded. When largefiles are downloaded from the central store, a copy is
+saved in this store.
+
+== The Global Cache ==
+
+largefiles in a local repository cache are hardlinked to files in the global
+cache. Before a file is downloaded we check if it is in the global cache.
+
+== Implementation Details ==
+
+Each largefile has a standin which is in .hglf. The standin is tracked by
+Mercurial.  The standin contains the SHA1 hash of the largefile. When a
+largefile is added/removed/copied/renamed/etc the same operation is applied to
+the standin. Thus the history of the standin is the history of the largefile.
+
+For performance reasons, the contents of a standin are only updated before a
+commit.  Standins are added/removed/copied/renamed from add/remove/copy/rename
+Mercurial commands but their contents will not be updated. The contents of a
+standin will always be the hash of the largefile as of the last commit. To
+support some commands (revert) some standins are temporarily updated but will
+be changed back after the command is finished.
+
+A Mercurial dirstate object tracks the state of the largefiles. The dirstate
+uses the last modified time and current size to detect if a file has changed
+(without reading the entire contents of the file).
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/lfcommands.py	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,483 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''High-level command functions: lfadd() et. al, plus the cmdtable.'''
+
+import os
+import shutil
+
+from mercurial import util, match as match_, hg, node, context, error
+from mercurial.i18n import _
+
+import lfutil
+import basestore
+
+# -- Commands ----------------------------------------------------------
+
+def lfconvert(ui, src, dest, *pats, **opts):
+    '''Convert a normal repository to a largefiles repository
+
+    Convert source repository creating an identical repository, except that all
+    files that match the patterns given, or are over the given size will be
+    added as largefiles. The size used to determine whether or not to track a
+    file as a largefile is the size of the first version of the file. After
+    running this command you will need to make sure that largefiles is enabled
+    anywhere you intend to push the new repository.'''
+
+    if opts['tonormal']:
+        tolfile = False
+    else:
+        tolfile = True
+        size = opts['size']
+        if not size:
+            size = ui.config(lfutil.longname, 'size', default=None)
+            try:
+                size = int(size)
+            except ValueError:
+                raise util.Abort(_('largefiles.size must be integer, was %s\n') % \
+                    size)
+            except TypeError:
+                raise util.Abort(_('size must be specified'))
+
+    try:
+        rsrc = hg.repository(ui, src)
+        if not rsrc.local():
+            raise util.Abort(_('%s is not a local Mercurial repo') % src)
+    except error.RepoError, err:
+        ui.traceback()
+        raise util.Abort(err.args[0])
+    if os.path.exists(dest):
+        if not os.path.isdir(dest):
+            raise util.Abort(_('destination %s already exists') % dest)
+        elif os.listdir(dest):
+            raise util.Abort(_('destination %s is not empty') % dest)
+    try:
+        ui.status(_('initializing destination %s\n') % dest)
+        rdst = hg.repository(ui, dest, create=True)
+        if not rdst.local():
+            raise util.Abort(_('%s is not a local Mercurial repo') % dest)
+    except error.RepoError:
+        ui.traceback()
+        raise util.Abort(_('%s is not a repo') % dest)
+
+    try:
+        # Lock destination to prevent modification while it is converted to.
+        # Don't need to lock src because we are just reading from its history
+        # which can't change.
+        dst_lock = rdst.lock()
+
+        # Get a list of all changesets in the source.  The easy way to do this
+        # is to simply walk the changelog, using changelog.nodesbewteen().
+        # Take a look at mercurial/revlog.py:639 for more details.
+        # Use a generator instead of a list to decrease memory usage
+        ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
+            rsrc.heads())[0])
+        revmap = {node.nullid: node.nullid}
+        if tolfile:
+            lfiles = set()
+            normalfiles = set()
+            if not pats:
+                pats = ui.config(lfutil.longname, 'patterns', default=())
+                if pats:
+                    pats = pats.split(' ')
+            if pats:
+                matcher = match_.match(rsrc.root, '', list(pats))
+            else:
+                matcher = None
+
+            lfiletohash = {}
+            for ctx in ctxs:
+                ui.progress(_('converting revisions'), ctx.rev(),
+                    unit=_('revision'), total=rsrc['tip'].rev())
+                _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
+                    lfiles, normalfiles, matcher, size, lfiletohash)
+            ui.progress(_('converting revisions'), None)
+
+            if os.path.exists(rdst.wjoin(lfutil.shortname)):
+                shutil.rmtree(rdst.wjoin(lfutil.shortname))
+
+            for f in lfiletohash.keys():
+                if os.path.isfile(rdst.wjoin(f)):
+                    os.unlink(rdst.wjoin(f))
+                try:
+                    os.removedirs(os.path.dirname(rdst.wjoin(f)))
+                except:
+                    pass
+
+        else:
+            for ctx in ctxs:
+                ui.progress(_('converting revisions'), ctx.rev(),
+                    unit=_('revision'), total=rsrc['tip'].rev())
+                _addchangeset(ui, rsrc, rdst, ctx, revmap)
+
+            ui.progress(_('converting revisions'), None)
+    except:
+        # we failed, remove the new directory
+        shutil.rmtree(rdst.root)
+        raise
+    finally:
+        dst_lock.release()
+
+def _addchangeset(ui, rsrc, rdst, ctx, revmap):
+ # Convert src parents to dst parents
+    parents = []
+    for p in ctx.parents():
+        parents.append(revmap[p.node()])
+    while len(parents) < 2:
+        parents.append(node.nullid)
+
+    # Generate list of changed files
+    files = set(ctx.files())
+    if node.nullid not in parents:
+        mc = ctx.manifest()
+        mp1 = ctx.parents()[0].manifest()
+        mp2 = ctx.parents()[1].manifest()
+        files |= (set(mp1) | set(mp2)) - set(mc)
+        for f in mc:
+            if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
+                files.add(f)
+
+    def getfilectx(repo, memctx, f):
+        if lfutil.standin(f) in files:
+            # if the file isn't in the manifest then it was removed
+            # or renamed, raise IOError to indicate this
+            try:
+                fctx = ctx.filectx(lfutil.standin(f))
+            except error.LookupError:
+                raise IOError()
+            renamed = fctx.renamed()
+            if renamed:
+                renamed = lfutil.splitstandin(renamed[0])
+
+            hash = fctx.data().strip()
+            path = lfutil.findfile(rsrc, hash)
+            ### TODO: What if the file is not cached?
+            data = ''
+            fd = None
+            try:
+                fd = open(path, 'rb')
+                data = fd.read()
+            finally:
+                if fd: fd.close()
+            return context.memfilectx(f, data, 'l' in fctx.flags(),
+                                      'x' in fctx.flags(), renamed)
+        else:
+            try:
+                fctx = ctx.filectx(f)
+            except error.LookupError:
+                raise IOError()
+            renamed = fctx.renamed()
+            if renamed:
+                renamed = renamed[0]
+            data = fctx.data()
+            if f == '.hgtags':
+                newdata = []
+                for line in data.splitlines():
+                    id, name = line.split(' ', 1)
+                    newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]),
+                        name))
+                data = ''.join(newdata)
+            return context.memfilectx(f, data, 'l' in fctx.flags(),
+                                      'x' in fctx.flags(), renamed)
+
+    dstfiles = []
+    for file in files:
+        if lfutil.isstandin(file):
+            dstfiles.append(lfutil.splitstandin(file))
+        else:
+            dstfiles.append(file)
+    # Commit
+    mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
+                          getfilectx, ctx.user(), ctx.date(), ctx.extra())
+    ret = rdst.commitctx(mctx)
+    rdst.dirstate.setparents(ret)
+    revmap[ctx.node()] = rdst.changelog.tip()
+
+def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
+        matcher, size, lfiletohash):
+    # Convert src parents to dst parents
+    parents = []
+    for p in ctx.parents():
+        parents.append(revmap[p.node()])
+    while len(parents) < 2:
+        parents.append(node.nullid)
+
+    # Generate list of changed files
+    files = set(ctx.files())
+    if node.nullid not in parents:
+        mc = ctx.manifest()
+        mp1 = ctx.parents()[0].manifest()
+        mp2 = ctx.parents()[1].manifest()
+        files |= (set(mp1) | set(mp2)) - set(mc)
+        for f in mc:
+            if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
+                files.add(f)
+
+    dstfiles = []
+    for f in files:
+        if f not in lfiles and f not in normalfiles:
+            islfile = _islfile(f, ctx, matcher, size)
+            # If this file was renamed or copied then copy
+            # the lfileness of its predecessor
+            if f in ctx.manifest():
+                fctx = ctx.filectx(f)
+                renamed = fctx.renamed()
+                renamedlfile = renamed and renamed[0] in lfiles
+                islfile |= renamedlfile
+                if 'l' in fctx.flags():
+                    if renamedlfile:
+                        raise util.Abort(
+                            _('Renamed/copied largefile %s becomes symlink') % f)
+                    islfile = False
+            if islfile:
+                lfiles.add(f)
+            else:
+                normalfiles.add(f)
+
+        if f in lfiles:
+            dstfiles.append(lfutil.standin(f))
+            # lfile in manifest if it has not been removed/renamed
+            if f in ctx.manifest():
+                if 'l' in ctx.filectx(f).flags():
+                    if renamed and renamed[0] in lfiles:
+                        raise util.Abort(_('largefile %s becomes symlink') % f)
+
+                # lfile was modified, update standins
+                fullpath = rdst.wjoin(f)
+                lfutil.createdir(os.path.dirname(fullpath))
+                m = util.sha1('')
+                m.update(ctx[f].data())
+                hash = m.hexdigest()
+                if f not in lfiletohash or lfiletohash[f] != hash:
+                    try:
+                        fd = open(fullpath, 'wb')
+                        fd.write(ctx[f].data())
+                    finally:
+                        if fd:
+                            fd.close()
+                    executable = 'x' in ctx[f].flags()
+                    os.chmod(fullpath, lfutil.getmode(executable))
+                    lfutil.writestandin(rdst, lfutil.standin(f), hash,
+                        executable)
+                    lfiletohash[f] = hash
+        else:
+            # normal file
+            dstfiles.append(f)
+
+    def getfilectx(repo, memctx, f):
+        if lfutil.isstandin(f):
+            # if the file isn't in the manifest then it was removed
+            # or renamed, raise IOError to indicate this
+            srcfname = lfutil.splitstandin(f)
+            try:
+                fctx = ctx.filectx(srcfname)
+            except error.LookupError:
+                raise IOError()
+            renamed = fctx.renamed()
+            if renamed:
+                # standin is always a lfile because lfileness
+                # doesn't change after rename or copy
+                renamed = lfutil.standin(renamed[0])
+
+            return context.memfilectx(f, lfiletohash[srcfname], 'l' in
+                fctx.flags(), 'x' in fctx.flags(), renamed)
+        else:
+            try:
+                fctx = ctx.filectx(f)
+            except error.LookupError:
+                raise IOError()
+            renamed = fctx.renamed()
+            if renamed:
+                renamed = renamed[0]
+
+            data = fctx.data()
+            if f == '.hgtags':
+                newdata = []
+                for line in data.splitlines():
+                    id, name = line.split(' ', 1)
+                    newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]),
+                        name))
+                data = ''.join(newdata)
+            return context.memfilectx(f, data, 'l' in fctx.flags(),
+                                      'x' in fctx.flags(), renamed)
+
+    # Commit
+    mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
+                          getfilectx, ctx.user(), ctx.date(), ctx.extra())
+    ret = rdst.commitctx(mctx)
+    rdst.dirstate.setparents(ret)
+    revmap[ctx.node()] = rdst.changelog.tip()
+
+def _islfile(file, ctx, matcher, size):
+    '''
+    A file is a lfile if it matches a pattern or is over
+    the given size.
+    '''
+    # Never store hgtags or hgignore as lfiles
+    if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
+        return False
+    if matcher and matcher(file):
+        return True
+    try:
+        return ctx.filectx(file).size() >= size * 1024 * 1024
+    except error.LookupError:
+        return False
+
+def uploadlfiles(ui, rsrc, rdst, files):
+    '''upload largefiles to the central store'''
+
+    # Don't upload locally. All largefiles are in the system wide cache
+    # so the other repo can just get them from there.
+    if not files or rdst.local():
+        return
+
+    store = basestore._openstore(rsrc, rdst, put=True)
+
+    at = 0
+    files = filter(lambda h: not store.exists(h), files)
+    for hash in files:
+        ui.progress(_('uploading largefiles'), at, unit='largefile', total=len(files))
+        source = lfutil.findfile(rsrc, hash)
+        if not source:
+            raise util.Abort(_('Missing largefile %s needs to be uploaded') % hash)
+        # XXX check for errors here
+        store.put(source, hash)
+        at += 1
+    ui.progress('uploading largefiles', None)
+
+def verifylfiles(ui, repo, all=False, contents=False):
+    '''Verify that every big file revision in the current changeset
+    exists in the central store.  With --contents, also verify that
+    the contents of each big file revision are correct (SHA-1 hash
+    matches the revision ID).  With --all, check every changeset in
+    this repository.'''
+    if all:
+        # Pass a list to the function rather than an iterator because we know a
+        # list will work.
+        revs = range(len(repo))
+    else:
+        revs = ['.']
+
+    store = basestore._openstore(repo)
+    return store.verify(revs, contents=contents)
+
+def cachelfiles(ui, repo, node):
+    '''cachelfiles ensures that all largefiles needed by the specified revision
+    are present in the repository's largefile cache.
+
+    returns a tuple (cached, missing).  cached is the list of files downloaded
+    by this operation; missing is the list of files that were needed but could
+    not be found.'''
+    lfiles = lfutil.listlfiles(repo, node)
+    toget = []
+
+    for lfile in lfiles:
+        expectedhash = repo[node][lfutil.standin(lfile)].data().strip()
+        # if it exists and its hash matches, it might have been locally
+        # modified before updating and the user chose 'local'.  in this case,
+        # it will not be in any store, so don't look for it.
+        if (not os.path.exists(repo.wjoin(lfile)) \
+                or expectedhash != lfutil.hashfile(repo.wjoin(lfile))) and \
+                not lfutil.findfile(repo, expectedhash):
+            toget.append((lfile, expectedhash))
+
+    if toget:
+        store = basestore._openstore(repo)
+        ret = store.get(toget)
+        return ret
+
+    return ([], [])
+
+def updatelfiles(ui, repo, filelist=None, printmessage=True):
+    wlock = repo.wlock()
+    try:
+        lfdirstate = lfutil.openlfdirstate(ui, repo)
+        lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
+
+        if filelist is not None:
+            lfiles = [f for f in lfiles if f in filelist]
+
+        printed = False
+        if printmessage and lfiles:
+            ui.status(_('getting changed largefiles\n'))
+            printed = True
+            cachelfiles(ui, repo, '.')
+
+        updated, removed = 0, 0
+        for i in map(lambda f: _updatelfile(repo, lfdirstate, f), lfiles):
+            # increment the appropriate counter according to _updatelfile's
+            # return value
+            updated += i > 0 and i or 0
+            removed -= i < 0 and i or 0
+            if printmessage and (removed or updated) and not printed:
+                ui.status(_('getting changed largefiles\n'))
+                printed = True
+
+        lfdirstate.write()
+        if printed and printmessage:
+            ui.status(_('%d largefiles updated, %d removed\n') % (updated,
+                removed))
+    finally:
+        wlock.release()
+
+def _updatelfile(repo, lfdirstate, lfile):
+    '''updates a single largefile and copies the state of its standin from
+    the repository's dirstate to its state in the lfdirstate.
+
+    returns 1 if the file was modified, -1 if the file was removed, 0 if the
+    file was unchanged, and None if the needed largefile was missing from the
+    cache.'''
+    ret = 0
+    abslfile = repo.wjoin(lfile)
+    absstandin = repo.wjoin(lfutil.standin(lfile))
+    if os.path.exists(absstandin):
+        if os.path.exists(absstandin+'.orig'):
+            shutil.copyfile(abslfile, abslfile+'.orig')
+        expecthash = lfutil.readstandin(repo, lfile)
+        if expecthash != '' and \
+                (not os.path.exists(abslfile) or \
+                expecthash != lfutil.hashfile(abslfile)):
+            if not lfutil.copyfromcache(repo, expecthash, lfile):
+                return None # don't try to set the mode or update the dirstate
+            ret = 1
+        mode = os.stat(absstandin).st_mode
+        if mode != os.stat(abslfile).st_mode:
+            os.chmod(abslfile, mode)
+            ret = 1
+    else:
+        if os.path.exists(abslfile):
+            os.unlink(abslfile)
+            ret = -1
+    state = repo.dirstate[lfutil.standin(lfile)]
+    if state == 'n':
+        lfdirstate.normal(lfile)
+    elif state == 'r':
+        lfdirstate.remove(lfile)
+    elif state == 'a':
+        lfdirstate.add(lfile)
+    elif state == '?':
+        try:
+            # Mercurial >= 1.9
+            lfdirstate.drop(lfile)
+        except AttributeError:
+            # Mercurial <= 1.8
+            lfdirstate.forget(lfile)
+    return ret
+
+# -- hg commands declarations ------------------------------------------------
+
+
+cmdtable = {
+    'lfconvert': (lfconvert,
+                  [('s', 'size', 0, 'All files over this size (in megabytes) '
+                  'will be considered largefiles. This can also be specified in '
+                  'your hgrc as [largefiles].size.'),
+                  ('','tonormal',False,
+                      'Convert from a largefiles repo to a normal repo')],
+                  _('hg lfconvert SOURCE DEST [FILE ...]')),
+    }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/lfutil.py	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,502 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''largefiles utility code: must not import other modules in this package.'''
+
+import os
+import errno
+import inspect
+import shutil
+import stat
+import hashlib
+
+from mercurial import cmdutil, dirstate, httpconnection, match as match_, \
+        url as url_, util
+from mercurial.i18n import _
+
+try:
+    from mercurial import scmutil
+except ImportError:
+    pass
+
+shortname = '.hglf'
+longname = 'largefiles'
+
+
+# -- Portability wrappers ----------------------------------------------
+
+if 'subrepos' in inspect.getargspec(dirstate.dirstate.status)[0]:
+    # for Mercurial >= 1.5
+    def dirstate_walk(dirstate, matcher, unknown=False, ignored=False):
+        return dirstate.walk(matcher, [], unknown, ignored)
+else:
+    # for Mercurial <= 1.4
+    def dirstate_walk(dirstate, matcher, unknown=False, ignored=False):
+        return dirstate.walk(matcher, unknown, ignored)
+
+def repo_add(repo, list):
+    try:
+        # Mercurial <= 1.5
+        add = repo.add
+    except AttributeError:
+        # Mercurial >= 1.6
+        add = repo[None].add
+    return add(list)
+
+def repo_remove(repo, list, unlink=False):
+    try:
+        # Mercurial <= 1.5
+        remove = repo.remove
+    except AttributeError:
+        # Mercurial >= 1.6
+        try:
+            # Mercurial <= 1.8
+            remove = repo[None].remove
+        except AttributeError:
+            # Mercurial >= 1.9
+            def remove(list, unlink):
+                wlock = repo.wlock()
+                try:
+                    if unlink:
+                        for f in list:
+                            try:
+                                util.unlinkpath(repo.wjoin(f))
+                            except OSError, inst:
+                                if inst.errno != errno.ENOENT:
+                                    raise
+                    repo[None].forget(list)
+                finally:
+                    wlock.release()
+
+    return remove(list, unlink=unlink)
+
+def repo_forget(repo, list):
+    try:
+        # Mercurial <= 1.5
+        forget = repo.forget
+    except AttributeError:
+        # Mercurial >= 1.6
+        forget = repo[None].forget
+    return forget(list)
+
+def findoutgoing(repo, remote, force):
+    # First attempt is for Mercurial <= 1.5 second is for >= 1.6
+    try:
+        return repo.findoutgoing(remote)
+    except AttributeError:
+        from mercurial import discovery
+        try:
+            # Mercurial <= 1.8
+            return discovery.findoutgoing(repo, remote, force=force)
+        except AttributeError:
+            # Mercurial >= 1.9
+            common, _anyinc, _heads = discovery.findcommonincoming(repo,
+                remote, force=force)
+            return repo.changelog.findmissing(common)
+
+# -- Private worker functions ------------------------------------------
+
+if os.name == 'nt':
+    from mercurial import win32
+    try:
+        linkfn = win32.oslink
+    except:
+        linkfn = win32.os_link
+else:
+    linkfn = os.link
+
+def link(src, dest):
+    try:
+        linkfn(src, dest)
+    except OSError:
+        # If hardlinks fail fall back on copy
+        shutil.copyfile(src, dest)
+        os.chmod(dest, os.stat(src).st_mode)
+
+def systemcachepath(ui, hash):
+    path = ui.config(longname, 'systemcache', None)
+    if path:
+        path = os.path.join(path, hash)
+    else:
+        if os.name == 'nt':
+            path = os.path.join(os.getenv('LOCALAPPDATA') or \
+                os.getenv('APPDATA'), longname, hash)
+        elif os.name == 'posix':
+            path = os.path.join(os.getenv('HOME'), '.' + longname, hash)
+        else:
+            raise util.Abort(_('Unknown operating system: %s\n') % os.name)
+    return path
+
+def insystemcache(ui, hash):
+    return os.path.exists(systemcachepath(ui, hash))
+
+def findfile(repo, hash):
+    if incache(repo, hash):
+        repo.ui.note(_('Found %s in cache\n') % hash)
+        return cachepath(repo, hash)
+    if insystemcache(repo.ui, hash):
+        repo.ui.note(_('Found %s in system cache\n') % hash)
+        return systemcachepath(repo.ui, hash)
+    return None
+
+class largefiles_dirstate(dirstate.dirstate):
+    def __getitem__(self, key):
+        return super(largefiles_dirstate, self).__getitem__(unixpath(key))
+    def normal(self, f):
+        return super(largefiles_dirstate, self).normal(unixpath(f))
+    def remove(self, f):
+        return super(largefiles_dirstate, self).remove(unixpath(f))
+    def add(self, f):
+        return super(largefiles_dirstate, self).add(unixpath(f))
+    def drop(self, f):
+        return super(largefiles_dirstate, self).drop(unixpath(f))
+    def forget(self, f):
+        return super(largefiles_dirstate, self).forget(unixpath(f))
+
+def openlfdirstate(ui, repo):
+    '''
+    Return a dirstate object that tracks big files: i.e. its root is the
+    repo root, but it is saved in .hg/largefiles/dirstate.
+    '''
+    admin = repo.join(longname)
+    try:
+        # Mercurial >= 1.9
+        opener = scmutil.opener(admin)
+    except ImportError:
+        # Mercurial <= 1.8
+        opener = util.opener(admin)
+    if hasattr(repo.dirstate, '_validate'):
+        lfdirstate = largefiles_dirstate(opener, ui, repo.root,
+            repo.dirstate._validate)
+    else:
+        lfdirstate = largefiles_dirstate(opener, ui, repo.root)
+
+    # If the largefiles dirstate does not exist, populate and create it.  This
+    # ensures that we create it on the first meaningful largefiles operation in
+    # a new clone.  It also gives us an easy way to forcibly rebuild largefiles
+    # state:
+    #   rm .hg/largefiles/dirstate && hg status
+    # Or even, if things are really messed up:
+    #   rm -rf .hg/largefiles && hg status
+    if not os.path.exists(os.path.join(admin, 'dirstate')):
+        util.makedirs(admin)
+        matcher = getstandinmatcher(repo)
+        for standin in dirstate_walk(repo.dirstate, matcher):
+            lfile = splitstandin(standin)
+            hash = readstandin(repo, lfile)
+            lfdirstate.normallookup(lfile)
+            try:
+                if hash == hashfile(lfile):
+                    lfdirstate.normal(lfile)
+            except IOError, err:
+                if err.errno != errno.ENOENT:
+                    raise
+
+        lfdirstate.write()
+
+    return lfdirstate
+
+def lfdirstate_status(lfdirstate, repo, rev):
+    wlock = repo.wlock()
+    try:
+        match = match_.always(repo.root, repo.getcwd())
+        s = lfdirstate.status(match, [], False, False, False)
+        unsure, modified, added, removed, missing, unknown, ignored, clean = s
+        for lfile in unsure:
+            if repo[rev][standin(lfile)].data().strip() != \
+                    hashfile(repo.wjoin(lfile)):
+                modified.append(lfile)
+            else:
+                clean.append(lfile)
+                lfdirstate.normal(lfile)
+        lfdirstate.write()
+    finally:
+        wlock.release()
+    return (modified, added, removed, missing, unknown, ignored, clean)
+
+def listlfiles(repo, rev=None, matcher=None):
+    '''list largefiles in the working copy or specified changeset'''
+
+    if matcher is None:
+        matcher = getstandinmatcher(repo)
+
+    # ignore unknown files in working directory
+    return [splitstandin(f) for f in repo[rev].walk(matcher) \
+            if rev is not None or repo.dirstate[f] != '?']
+
+def incache(repo, hash):
+    return os.path.exists(cachepath(repo, hash))
+
+def createdir(dir):
+    if not os.path.exists(dir):
+        os.makedirs(dir)
+
+def cachepath(repo, hash):
+    return repo.join(os.path.join(longname, hash))
+
+def copyfromcache(repo, hash, filename):
+    '''copyfromcache copies the specified largefile from the repo or system
+    cache to the specified location in the repository.  It will not throw an
+    exception on failure, as it is meant to be called only after ensuring that
+    the needed largefile exists in the cache.'''
+    path = findfile(repo, hash)
+    if path is None:
+        return False
+    util.makedirs(os.path.dirname(repo.wjoin(filename)))
+    shutil.copy(path, repo.wjoin(filename))
+    return True
+
+def copytocache(repo, rev, file, uploaded=False):
+    hash = readstandin(repo, file)
+    if incache(repo, hash):
+        return
+    copytocacheabsolute(repo, repo.wjoin(file), hash)
+
+def copytocacheabsolute(repo, file, hash):
+    createdir(os.path.dirname(cachepath(repo, hash)))
+    if insystemcache(repo.ui, hash):
+        link(systemcachepath(repo.ui, hash), cachepath(repo, hash))
+    else:
+        shutil.copyfile(file, cachepath(repo, hash))
+        os.chmod(cachepath(repo, hash), os.stat(file).st_mode)
+        linktosystemcache(repo, hash)
+
+def linktosystemcache(repo, hash):
+    createdir(os.path.dirname(systemcachepath(repo.ui, hash)))
+    link(cachepath(repo, hash), systemcachepath(repo.ui, hash))
+
+def getstandinmatcher(repo, pats=[], opts={}):
+    '''Return a match object that applies pats to the standin directory'''
+    standindir = repo.pathto(shortname)
+    if pats:
+        # patterns supplied: search standin directory relative to current dir
+        cwd = repo.getcwd()
+        if os.path.isabs(cwd):
+            # cwd is an absolute path for hg -R <reponame>
+            # work relative to the repository root in this case
+            cwd = ''
+        pats = [os.path.join(standindir, cwd, pat) for pat in pats]
+    elif os.path.isdir(standindir):
+        # no patterns: relative to repo root
+        pats = [standindir]
+    else:
+        # no patterns and no standin dir: return matcher that matches nothing
+        match = match_.match(repo.root, None, [], exact=True)
+        match.matchfn = lambda f: False
+        return match
+    return getmatcher(repo, pats, opts, showbad=False)
+
+def getmatcher(repo, pats=[], opts={}, showbad=True):
+    '''Wrapper around scmutil.match() that adds showbad: if false, neuter
+    the match object\'s bad() method so it does not print any warnings
+    about missing files or directories.'''
+    try:
+        # Mercurial >= 1.9
+        match = scmutil.match(repo[None], pats, opts)
+    except ImportError:
+        # Mercurial <= 1.8
+        match = cmdutil.match(repo, pats, opts)
+
+    if not showbad:
+        match.bad = lambda f, msg: None
+    return match
+
+def composestandinmatcher(repo, rmatcher):
+    '''Return a matcher that accepts standins corresponding to the files
+    accepted by rmatcher. Pass the list of files in the matcher as the
+    paths specified by the user.'''
+    smatcher = getstandinmatcher(repo, rmatcher.files())
+    isstandin = smatcher.matchfn
+    def composed_matchfn(f):
+        return isstandin(f) and rmatcher.matchfn(splitstandin(f))
+    smatcher.matchfn = composed_matchfn
+
+    return smatcher
+
+def standin(filename):
+    '''Return the repo-relative path to the standin for the specified big
+    file.'''
+    # Notes:
+    # 1) Most callers want an absolute path, but _create_standin() needs
+    #    it repo-relative so lfadd() can pass it to repo_add().  So leave
+    #    it up to the caller to use repo.wjoin() to get an absolute path.
+    # 2) Join with '/' because that's what dirstate always uses, even on
+    #    Windows. Change existing separator to '/' first in case we are
+    #    passed filenames from an external source (like the command line).
+    return shortname + '/' + filename.replace(os.sep, '/')
+
+def isstandin(filename):
+    '''Return true if filename is a big file standin.  filename must
+    be in Mercurial\'s internal form (slash-separated).'''
+    return filename.startswith(shortname + '/')
+
+def splitstandin(filename):
+    # Split on / because that's what dirstate always uses, even on Windows.
+    # Change local separator to / first just in case we are passed filenames
+    # from an external source (like the command line).
+    bits = filename.replace(os.sep, '/').split('/', 1)
+    if len(bits) == 2 and bits[0] == shortname:
+        return bits[1]
+    else:
+        return None
+
+def updatestandin(repo, standin):
+    file = repo.wjoin(splitstandin(standin))
+    if os.path.exists(file):
+        hash = hashfile(file)
+        executable = getexecutable(file)
+        writestandin(repo, standin, hash, executable)
+
+def readstandin(repo, filename, node=None):
+    '''read hex hash from standin for filename at given node, or working
+    directory if no node is given'''
+    return repo[node][standin(filename)].data().strip()
+
+def writestandin(repo, standin, hash, executable):
+    '''write hhash to <repo.root>/<standin>'''
+    writehash(hash, repo.wjoin(standin), executable)
+
+def copyandhash(instream, outfile):
+    '''Read bytes from instream (iterable) and write them to outfile,
+    computing the SHA-1 hash of the data along the way.  Close outfile
+    when done and return the binary hash.'''
+    hasher = util.sha1('')
+    for data in instream:
+        hasher.update(data)
+        outfile.write(data)
+
+    # Blecch: closing a file that somebody else opened is rude and
+    # wrong.  But it's so darn convenient and practical!  After all,
+    # outfile was opened just to copy and hash.
+    outfile.close()
+
+    return hasher.digest()
+
+def hashrepofile(repo, file):
+    return hashfile(repo.wjoin(file))
+
+def hashfile(file):
+    if not os.path.exists(file):
+        return ''
+    hasher = util.sha1('')
+    fd = open(file, 'rb')
+    for data in blockstream(fd):
+        hasher.update(data)
+    fd.close()
+    return hasher.hexdigest()
+
+class limitreader(object):
+    def __init__(self, f, limit):
+        self.f = f
+        self.limit = limit
+
+    def read(self, length):
+        if self.limit == 0:
+            return ''
+        length = length > self.limit and self.limit or length
+        self.limit -= length
+        return self.f.read(length)
+
+    def close(self):
+        pass
+
+def blockstream(infile, blocksize=128 * 1024):
+    """Generator that yields blocks of data from infile and closes infile."""
+    while True:
+        data = infile.read(blocksize)
+        if not data:
+            break
+        yield data
+    # Same blecch as above.
+    infile.close()
+
+def readhash(filename):
+    rfile = open(filename, 'rb')
+    hash = rfile.read(40)
+    rfile.close()
+    if len(hash) < 40:
+        raise util.Abort(_('bad hash in \'%s\' (only %d bytes long)')
+                         % (filename, len(hash)))
+    return hash
+
+def writehash(hash, filename, executable):
+    util.makedirs(os.path.dirname(filename))
+    if os.path.exists(filename):
+        os.unlink(filename)
+    wfile = open(filename, 'wb')
+
+    try:
+        wfile.write(hash)
+        wfile.write('\n')
+    finally:
+        wfile.close()
+    if os.path.exists(filename):
+        os.chmod(filename, getmode(executable))
+
+def getexecutable(filename):
+    mode = os.stat(filename).st_mode
+    return (mode & stat.S_IXUSR) and (mode & stat.S_IXGRP) and (mode & \
+        stat.S_IXOTH)
+
+def getmode(executable):
+    if executable:
+        return 0755
+    else:
+        return 0644
+
+def urljoin(first, second, *arg):
+    def join(left, right):
+        if not left.endswith('/'):
+            left += '/'
+        if right.startswith('/'):
+            right = right[1:]
+        return left + right
+
+    url = join(first, second)
+    for a in arg:
+        url = join(url, a)
+    return url
+
+def hexsha1(data):
+    """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
+    object data"""
+    h = hashlib.sha1()
+    for chunk in util.filechunkiter(data):
+        h.update(chunk)
+    return h.hexdigest()
+
+def httpsendfile(ui, filename):
+    try:
+        # Mercurial >= 1.9
+        return httpconnection.httpsendfile(ui, filename, 'rb')
+    except ImportError:
+        if 'ui' in inspect.getargspec(url_.httpsendfile.__init__)[0]:
+            # Mercurial == 1.8
+            return url_.httpsendfile(ui, filename, 'rb')
+        else:
+            # Mercurial <= 1.7
+            return url_.httpsendfile(filename, 'rb')
+
+# Convert a path to a unix style path. This is used to give a
+# canonical path to the lfdirstate.
+def unixpath(path):
+    return os.path.normpath(path).replace(os.sep, '/')
+
+def islfilesrepo(repo):
+    return 'largefiles' in repo.requirements and any_(shortname+'/' in f[0] for f in
+        repo.store.datafiles())
+
+def any_(gen):
+    for x in gen:
+        if x:
+            return True
+    return False
+
+class storeprotonotcapable(BaseException):
+    def __init__(self, storetypes):
+        self.storetypes = storetypes
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/localstore.py	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,71 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''Store class for local filesystem.'''
+
+import os
+
+from mercurial import util
+from mercurial.i18n import _
+
+import lfutil
+import basestore
+
+class localstore(basestore.basestore):
+    '''Because there is a system wide cache, the local store always uses that
+    cache.  Since the cache is updated elsewhere, we can just read from it here
+    as if it were the store.'''
+
+    def __init__(self, ui, repo, remote):
+        url = os.path.join(remote.path, '.hg', lfutil.longname)
+        super(localstore, self).__init__(ui, repo, util.expandpath(url))
+
+    def put(self, source, filename, hash):
+        '''Any file that is put must already be in the system wide cache so do
+        nothing.'''
+        return
+
+    def exists(self, hash):
+        return lfutil.insystemcache(self.repo.ui, hash)
+
+    def _getfile(self, tmpfile, filename, hash):
+        if lfutil.insystemcache(self.ui, hash):
+            return lfutil.systemcachepath(self.ui, hash)
+        raise basestore.StoreError(filename, hash, '',
+            _("Can't get file locally"))
+
+    def _verifyfile(self, cctx, cset, contents, standin, verified):
+        filename = lfutil.splitstandin(standin)
+        if not filename:
+            return False
+        fctx = cctx[standin]
+        key = (filename, fctx.filenode())
+        if key in verified:
+            return False
+
+        expecthash = fctx.data()[0:40]
+        verified.add(key)
+        if not lfutil.insystemcache(self.ui, expecthash):
+            self.ui.warn(
+                _('changeset %s: %s missing\n'
+                  '  (looked for hash %s)\n')
+                % (cset, filename, expecthash))
+            return True                 # failed
+
+        if contents:
+            storepath = lfutil.systemcachepath(self.ui, expecthash)
+            actualhash = lfutil.hashfile(storepath)
+            if actualhash != expecthash:
+                self.ui.warn(
+                    _('changeset %s: %s: contents differ\n'
+                      '  (%s:\n'
+                      '  expected hash %s,\n'
+                      '  but got %s)\n')
+                    % (cset, filename, storepath, expecthash, actualhash))
+                return True             # failed
+        return False
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/overrides.py	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,902 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''Overridden Mercurial commands and functions for the largefiles extension'''
+
+import os
+import copy
+
+from mercurial import hg, commands, util, cmdutil, match as match_, node, \
+        archival, error, merge
+from mercurial.i18n import _
+from mercurial.node import hex
+from hgext import rebase
+
+try:
+    from mercurial import scmutil
+except ImportError:
+    pass
+
+import lfutil
+import lfcommands
+
+def installnormalfilesmatchfn(manifest):
+    '''overrides scmutil.match so that the matcher it returns will ignore all
+    largefiles'''
+    oldmatch = None # for the closure
+    def override_match(repo, pats=[], opts={}, globbed=False,
+            default='relpath'):
+        match = oldmatch(repo, pats, opts, globbed, default)
+        m = copy.copy(match)
+        notlfile = lambda f: not (lfutil.isstandin(f) or lfutil.standin(f) in
+                manifest)
+        m._files = filter(notlfile, m._files)
+        m._fmap = set(m._files)
+        orig_matchfn = m.matchfn
+        m.matchfn = lambda f: notlfile(f) and orig_matchfn(f) or None
+        return m
+    oldmatch = installmatchfn(override_match)
+
+def installmatchfn(f):
+    try:
+        # Mercurial >= 1.9
+        oldmatch = scmutil.match
+    except ImportError:
+        # Mercurial <= 1.8
+        oldmatch = cmdutil.match
+    setattr(f, 'oldmatch', oldmatch)
+    try:
+        # Mercurial >= 1.9
+        scmutil.match = f
+    except ImportError:
+        # Mercurial <= 1.8
+        cmdutil.match = f
+    return oldmatch
+
+def restorematchfn():
+    '''restores scmutil.match to what it was before installnormalfilesmatchfn
+    was called.  no-op if scmutil.match is its original function.
+
+    Note that n calls to installnormalfilesmatchfn will require n calls to
+    restore matchfn to reverse'''
+    try:
+        # Mercurial >= 1.9
+        scmutil.match = getattr(scmutil.match, 'oldmatch', scmutil.match)
+    except ImportError:
+        # Mercurial <= 1.8
+        cmdutil.match = getattr(cmdutil.match, 'oldmatch', cmdutil.match)
+
+# -- Wrappers: modify existing commands --------------------------------
+
+# Add works by going through the files that the user wanted to add
+# and checking if they should be added as lfiles. Then making a new
+# matcher which matches only the normal files and running the original
+# version of add.
+def override_add(orig, ui, repo, *pats, **opts):
+    large = opts.pop('large', None)
+
+    lfsize = opts.pop('lfsize', None)
+    if not lfsize and lfutil.islfilesrepo(repo):
+        lfsize = ui.config(lfutil.longname, 'size', default='10')
+    if lfsize:
+        try:
+            lfsize = int(lfsize)
+        except ValueError:
+            raise util.Abort(_('largefiles: size must be an integer, was %s\n') % lfsize)
+
+    lfmatcher = None
+    if os.path.exists(repo.wjoin(lfutil.shortname)):
+        lfpats = ui.config(lfutil.longname, 'patterns', default=())
+        if lfpats:
+            lfpats = lfpats.split(' ')
+            lfmatcher = match_.match(repo.root, '', list(lfpats))
+
+    lfnames = []
+    try:
+        # Mercurial >= 1.9
+        m = scmutil.match(repo[None], pats, opts)
+    except ImportError:
+        # Mercurial <= 1.8
+        m = cmdutil.match(repo, pats, opts)
+    m.bad = lambda x, y: None
+    wctx = repo[None]
+    for f in repo.walk(m):
+        exact = m.exact(f)
+        lfile = lfutil.standin(f) in wctx
+        nfile = f in wctx
+        exists = lfile or nfile
+
+        # Don't warn the user when they attempt to add a normal tracked file.
+        # The normal add code will do that for us.
+        if exact and exists:
+            if lfile:
+                ui.warn(_('%s already a largefile\n') % f)
+            continue
+
+        if exact or not exists:
+            if large or (lfsize and os.path.getsize(repo.wjoin(f)) >= \
+                    lfsize * 1024 * 1024) or (lfmatcher and lfmatcher(f)):
+                lfnames.append(f)
+                if ui.verbose or not exact:
+                    ui.status(_('adding %s as a largefile\n') % m.rel(f))
+
+    bad = []
+    standins = []
+
+    # Need to lock otherwise there could be a race condition inbetween when
+    # standins are created and added to the repo
+    wlock = repo.wlock()
+    try:
+        if not opts.get('dry_run'):
+            lfdirstate = lfutil.openlfdirstate(ui, repo)
+            for f in lfnames:
+                standinname = lfutil.standin(f)
+                lfutil.writestandin(repo, standinname, hash='',
+                    executable=lfutil.getexecutable(repo.wjoin(f)))
+                standins.append(standinname)
+                if lfdirstate[f] == 'r':
+                    lfdirstate.normallookup(f)
+                else:
+                    lfdirstate.add(f)
+            lfdirstate.write()
+            bad += [lfutil.splitstandin(f) for f in lfutil.repo_add(repo,
+                standins) if f in m.files()]
+    finally:
+        wlock.release()
+
+    installnormalfilesmatchfn(repo[None].manifest())
+    result = orig(ui, repo, *pats, **opts)
+    restorematchfn()
+
+    return (result == 1 or bad) and 1 or 0
+
+def override_remove(orig, ui, repo, *pats, **opts):
+    manifest = repo[None].manifest()
+    installnormalfilesmatchfn(manifest)
+    orig(ui, repo, *pats, **opts)
+    restorematchfn()
+
+    after, force = opts.get('after'), opts.get('force')
+    if not pats and not after:
+        raise util.Abort(_('no files specified'))
+    try:
+        # Mercurial >= 1.9
+        m = scmutil.match(repo[None], pats, opts)
+    except ImportError:
+        # Mercurial <= 1.8
+        m = cmdutil.match(repo, pats, opts)
+    try:
+        repo.lfstatus = True
+        s = repo.status(match=m, clean=True)
+    finally:
+        repo.lfstatus = False
+    modified, added, deleted, clean = [[f for f in list if lfutil.standin(f) \
+        in manifest] for list in [s[0], s[1], s[3], s[6]]]
+
+    def warn(files, reason):
+        for f in files:
+            ui.warn(_('not removing %s: file %s (use -f to force removal)\n')
+                    % (m.rel(f), reason))
+
+    if force:
+        remove, forget = modified + deleted + clean, added
+    elif after:
+        remove, forget = deleted, []
+        warn(modified + added + clean, _('still exists'))
+    else:
+        remove, forget = deleted + clean, []
+        warn(modified, _('is modified'))
+        warn(added, _('has been marked for add'))
+
+    for f in sorted(remove + forget):
+        if ui.verbose or not m.exact(f):
+            ui.status(_('removing %s\n') % m.rel(f))
+
+    # Need to lock because standin files are deleted then removed from the
+    # repository and we could race inbetween.
+    wlock = repo.wlock()
+    try:
+        lfdirstate = lfutil.openlfdirstate(ui, repo)
+        for f in remove:
+            if not after:
+                os.unlink(repo.wjoin(f))
+                currentdir = os.path.split(f)[0]
+                while currentdir and not os.listdir(repo.wjoin(currentdir)):
+                    os.rmdir(repo.wjoin(currentdir))
+                    currentdir = os.path.split(currentdir)[0]
+            lfdirstate.remove(f)
+        lfdirstate.write()
+
+        forget = [lfutil.standin(f) for f in forget]
+        remove = [lfutil.standin(f) for f in remove]
+        lfutil.repo_forget(repo, forget)
+        lfutil.repo_remove(repo, remove, unlink=True)
+    finally:
+        wlock.release()
+
+def override_status(orig, ui, repo, *pats, **opts):
+    try:
+        repo.lfstatus = True
+        return orig(ui, repo, *pats, **opts)
+    finally:
+        repo.lfstatus = False
+
+def override_log(orig, ui, repo, *pats, **opts):
+    try:
+        repo.lfstatus = True
+        orig(ui, repo, *pats, **opts)
+    finally:
+        repo.lfstatus = False
+
+def override_verify(orig, ui, repo, *pats, **opts):
+    large = opts.pop('large', False)
+    all = opts.pop('lfa', False)
+    contents = opts.pop('lfc', False)
+
+    result = orig(ui, repo, *pats, **opts)
+    if large:
+        result = result or lfcommands.verifylfiles(ui, repo, all, contents)
+    return result
+
+# Override needs to refresh standins so that update's normal merge
+# will go through properly. Then the other update hook (overriding repo.update)
+# will get the new files. Filemerge is also overriden so that the merge
+# will merge standins correctly.
+def override_update(orig, ui, repo, *pats, **opts):
+    lfdirstate = lfutil.openlfdirstate(ui, repo)
+    s = lfdirstate.status(match_.always(repo.root, repo.getcwd()), [], False,
+        False, False)
+    (unsure, modified, added, removed, missing, unknown, ignored, clean) = s
+
+    # Need to lock between the standins getting updated and their lfiles
+    # getting updated
+    wlock = repo.wlock()
+    try:
+        if opts['check']:
+            mod = len(modified) > 0
+            for lfile in unsure:
+                standin = lfutil.standin(lfile)
+                if repo['.'][standin].data().strip() != \
+                        lfutil.hashfile(repo.wjoin(lfile)):
+                    mod = True
+                else:
+                    lfdirstate.normal(lfile)
+            lfdirstate.write()
+            if mod:
+                raise util.Abort(_('uncommitted local changes'))
+        # XXX handle removed differently
+        if not opts['clean']:
+            for lfile in unsure + modified + added:
+                lfutil.updatestandin(repo, lfutil.standin(lfile))
+    finally:
+        wlock.release()
+    return orig(ui, repo, *pats, **opts)
+
+# Override filemerge to prompt the user about how they wish to merge lfiles.
+# This will handle identical edits, and copy/rename + edit without prompting
+# the user.
+def override_filemerge(origfn, repo, mynode, orig, fcd, fco, fca):
+    # Use better variable names here. Because this is a wrapper we cannot
+    # change the variable names in the function declaration.
+    fcdest, fcother, fcancestor = fcd, fco, fca
+    if not lfutil.isstandin(orig):
+        return origfn(repo, mynode, orig, fcdest, fcother, fcancestor)
+    else:
+        if not fcother.cmp(fcdest): # files identical?
+            return None
+
+        # backwards, use working dir parent as ancestor
+        if fcancestor == fcother:
+            fcancestor = fcdest.parents()[0]
+
+        if orig != fcother.path():
+            repo.ui.status(_('merging %s and %s to %s\n')
+                           % (lfutil.splitstandin(orig),
+                              lfutil.splitstandin(fcother.path()),
+                              lfutil.splitstandin(fcdest.path())))
+        else:
+            repo.ui.status(_('merging %s\n')
+                           % lfutil.splitstandin(fcdest.path()))
+
+        if fcancestor.path() != fcother.path() and fcother.data() == \
+                fcancestor.data():
+            return 0
+        if fcancestor.path() != fcdest.path() and fcdest.data() == \
+                fcancestor.data():
+            repo.wwrite(fcdest.path(), fcother.data(), fcother.flags())
+            return 0
+
+        if repo.ui.promptchoice(_('largefile %s has a merge conflict\n'
+                             'keep (l)ocal or take (o)ther?') %
+                             lfutil.splitstandin(orig),
+                             (_('&Local'), _('&Other')), 0) == 0:
+            return 0
+        else:
+            repo.wwrite(fcdest.path(), fcother.data(), fcother.flags())
+            return 0
+
+# Copy first changes the matchers to match standins instead of lfiles.
+# Then it overrides util.copyfile in that function it checks if the destination
+# lfile already exists. It also keeps a list of copied files so that the lfiles
+# can be copied and the dirstate updated.
+def override_copy(orig, ui, repo, pats, opts, rename=False):
+    # doesn't remove lfile on rename
+    if len(pats) < 2:
+        # this isn't legal, let the original function deal with it
+        return orig(ui, repo, pats, opts, rename)
+
+    def makestandin(relpath):
+        try:
+            # Mercurial >= 1.9
+            path = scmutil.canonpath(repo.root, repo.getcwd(), relpath)
+        except ImportError:
+            # Mercurial <= 1.8
+            path = util.canonpath(repo.root, repo.getcwd(), relpath)
+        return os.path.join(os.path.relpath('.', repo.getcwd()),
+            lfutil.standin(path))
+
+    try:
+        # Mercurial >= 1.9
+        fullpats = scmutil.expandpats(pats)
+    except ImportError:
+        # Mercurial <= 1.8
+        fullpats = cmdutil.expandpats(pats)
+    dest = fullpats[-1]
+
+    if os.path.isdir(dest):
+        if not os.path.isdir(makestandin(dest)):
+            os.makedirs(makestandin(dest))
+    # This could copy both lfiles and normal files in one command, but we don't
+    # want to do that first replace their matcher to only match normal files
+    # and run it then replace it to just match lfiles and run it again
+    nonormalfiles = False
+    nolfiles = False
+    try:
+        installnormalfilesmatchfn(repo[None].manifest())
+        result = orig(ui, repo, pats, opts, rename)
+    except util.Abort, e:
+        if str(e) != 'no files to copy':
+            raise e
+        else:
+            nonormalfiles = True
+        result = 0
+    finally:
+        restorematchfn()
+
+    # The first rename can cause our current working directory to be removed.
+    # In that case there is nothing left to copy/rename so just quit.
+    try:
+        repo.getcwd()
+    except OSError:
+        return result
+
+    try:
+        # When we call orig below it creates the standins but we don't add them
+        # to the dir state until later so lock during that time.
+        wlock = repo.wlock()
+
+        manifest = repo[None].manifest()
+        oldmatch = None # for the closure
+        def override_match(repo, pats=[], opts={}, globbed=False,
+                default='relpath'):
+            newpats = []
+            # The patterns were previously mangled to add the standin
+            # directory; we need to remove that now
+            for pat in pats:
+                if match_.patkind(pat) is None and lfutil.shortname in pat:
+                    newpats.append(pat.replace(lfutil.shortname, ''))
+                else:
+                    newpats.append(pat)
+            match = oldmatch(repo, newpats, opts, globbed, default)
+            m = copy.copy(match)
+            lfile = lambda f: lfutil.standin(f) in manifest
+            m._files = [lfutil.standin(f) for f in m._files if lfile(f)]
+            m._fmap = set(m._files)
+            orig_matchfn = m.matchfn
+            m.matchfn = lambda f: lfutil.isstandin(f) and \
+                lfile(lfutil.splitstandin(f)) and \
+                orig_matchfn(lfutil.splitstandin(f)) or None
+            return m
+        oldmatch = installmatchfn(override_match)
+        listpats = []
+        for pat in pats:
+            if match_.patkind(pat) is not None:
+                listpats.append(pat)
+            else:
+                listpats.append(makestandin(pat))
+
+        try:
+            origcopyfile = util.copyfile
+            copiedfiles = []
+            def override_copyfile(src, dest):
+                if lfutil.shortname in src and lfutil.shortname in dest:
+                    destlfile = dest.replace(lfutil.shortname, '')
+                    if not opts['force'] and os.path.exists(destlfile):
+                        raise IOError('',
+                            _('destination largefile already exists'))
+                copiedfiles.append((src, dest))
+                origcopyfile(src, dest)
+
+            util.copyfile = override_copyfile
+            result += orig(ui, repo, listpats, opts, rename)
+        finally:
+            util.copyfile = origcopyfile
+
+        lfdirstate = lfutil.openlfdirstate(ui, repo)
+        for (src, dest) in copiedfiles:
+            if lfutil.shortname in src and lfutil.shortname in dest:
+                srclfile = src.replace(lfutil.shortname, '')
+                destlfile = dest.replace(lfutil.shortname, '')
+                destlfiledir = os.path.dirname(destlfile) or '.'
+                if not os.path.isdir(destlfiledir):
+                    os.makedirs(destlfiledir)
+                if rename:
+                    os.rename(srclfile, destlfile)
+                    lfdirstate.remove(os.path.relpath(srclfile,
+                        repo.root))
+                else:
+                    util.copyfile(srclfile, destlfile)
+                lfdirstate.add(os.path.relpath(destlfile,
+                    repo.root))
+        lfdirstate.write()
+    except util.Abort, e:
+        if str(e) != 'no files to copy':
+            raise e
+        else:
+            nolfiles = True
+    finally:
+        restorematchfn()
+        wlock.release()
+
+    if nolfiles and nonormalfiles:
+        raise util.Abort(_('no files to copy'))
+
+    return result
+
+# When the user calls revert, we have to be careful to not revert any changes
+# to other lfiles accidentally.  This means we have to keep track of the lfiles
+# that are being reverted so we only pull down the necessary lfiles.
+#
+# Standins are only updated (to match the hash of lfiles) before commits.
+# Update the standins then run the original revert (changing the matcher to hit
+# standins instead of lfiles). Based on the resulting standins update the
+# lfiles. Then return the standins to their proper state
+def override_revert(orig, ui, repo, *pats, **opts):
+    # Because we put the standins in a bad state (by updating them) and then
+    # return them to a correct state we need to lock to prevent others from
+    # changing them in their incorrect state.
+    wlock = repo.wlock()
+    try:
+        lfdirstate = lfutil.openlfdirstate(ui, repo)
+        (modified, added, removed, missing, unknown, ignored, clean) = \
+            lfutil.lfdirstate_status(lfdirstate, repo, repo['.'].rev())
+        for lfile in modified:
+            lfutil.updatestandin(repo, lfutil.standin(lfile))
+
+        try:
+            ctx = repo[opts.get('rev')]
+            oldmatch = None # for the closure
+            def override_match(ctxorrepo, pats=[], opts={}, globbed=False,
+                    default='relpath'):
+                if hasattr(ctxorrepo, 'match'):
+                    ctx0 = ctxorrepo
+                else:
+                    ctx0 = ctxorrepo[None]
+                match = oldmatch(ctxorrepo, pats, opts, globbed, default)
+                m = copy.copy(match)
+                def tostandin(f):
+                    if lfutil.standin(f) in ctx0 or lfutil.standin(f) in ctx:
+                        return lfutil.standin(f)
+                    elif lfutil.standin(f) in repo[None]:
+                        return None
+                    return f
+                m._files = [tostandin(f) for f in m._files]
+                m._files = [f for f in m._files if f is not None]
+                m._fmap = set(m._files)
+                orig_matchfn = m.matchfn
+                def matchfn(f):
+                    if lfutil.isstandin(f):
+                        # We need to keep track of what lfiles are being
+                        # matched so we know which ones to update later
+                        # (otherwise we revert changes to other lfiles
+                        # accidentally).  This is repo specific, so duckpunch
+                        # the repo object to keep the list of lfiles for us
+                        # later.
+                        if orig_matchfn(lfutil.splitstandin(f)) and \
+                                (f in repo[None] or f in ctx):
+                            lfileslist = getattr(repo, '_lfilestoupdate', [])
+                            lfileslist.append(lfutil.splitstandin(f))
+                            repo._lfilestoupdate = lfileslist
+                            return True
+                        else:
+                            return False
+                    return orig_matchfn(f)
+                m.matchfn = matchfn
+                return m
+            oldmatch = installmatchfn(override_match)
+            try:
+                # Mercurial >= 1.9
+                scmutil.match
+                matches = override_match(repo[None], pats, opts)
+            except ImportError:
+                # Mercurial <= 1.8
+                matches = override_match(repo, pats, opts)
+            orig(ui, repo, *pats, **opts)
+        finally:
+            restorematchfn()
+        lfileslist = getattr(repo, '_lfilestoupdate', [])
+        lfcommands.updatelfiles(ui, repo, filelist=lfileslist, printmessage=False)
+        # Empty out the lfiles list so we start fresh next time
+        repo._lfilestoupdate = []
+        for lfile in modified:
+            if lfile in lfileslist:
+                if os.path.exists(repo.wjoin(lfutil.standin(lfile))) and lfile\
+                        in repo['.']:
+                    lfutil.writestandin(repo, lfutil.standin(lfile),
+                        repo['.'][lfile].data().strip(),
+                        'x' in repo['.'][lfile].flags())
+        lfdirstate = lfutil.openlfdirstate(ui, repo)
+        for lfile in added:
+            standin = lfutil.standin(lfile)
+            if standin not in ctx and (standin in matches or opts.get('all')):
+                if lfile in lfdirstate:
+                    try:
+                        # Mercurial >= 1.9
+                        lfdirstate.drop(lfile)
+                    except AttributeError:
+                        # Mercurial <= 1.8
+                        lfdirstate.forget(lfile)
+                util.unlinkpath(repo.wjoin(standin))
+        lfdirstate.write()
+    finally:
+        wlock.release()
+
+def hg_update(orig, repo, node):
+    result = orig(repo, node)
+    # XXX check if it worked first
+    lfcommands.updatelfiles(repo.ui, repo)
+    return result
+
+def hg_clean(orig, repo, node, show_stats=True):
+    result = orig(repo, node, show_stats)
+    lfcommands.updatelfiles(repo.ui, repo)
+    return result
+
+def hg_merge(orig, repo, node, force=None, remind=True):
+    result = orig(repo, node, force, remind)
+    lfcommands.updatelfiles(repo.ui, repo)
+    return result
+
+# When we rebase a repository with remotely changed lfiles, we need
+# to take some extra care so that the lfiles are correctly updated
+# in the working copy
+def override_pull(orig, ui, repo, source=None, **opts):
+    if opts.get('rebase', False):
+        repo._isrebasing = True
+        try:
+            if opts.get('update'):
+                 del opts['update']
+                 ui.debug('--update and --rebase are not compatible, ignoring '
+                          'the update flag\n')
+            del opts['rebase']
+            try:
+                # Mercurial >= 1.9
+                cmdutil.bailifchanged(repo)
+            except AttributeError:
+                # Mercurial <= 1.8
+                cmdutil.bail_if_changed(repo)
+            revsprepull = len(repo)
+            origpostincoming = commands.postincoming
+            def _dummy(*args, **kwargs):
+                pass
+            commands.postincoming = _dummy
+            repo.lfpullsource = source
+            if not source:
+                source = 'default'
+            try:
+                result = commands.pull(ui, repo, source, **opts)
+            finally:
+                commands.postincoming = origpostincoming
+            revspostpull = len(repo)
+            if revspostpull > revsprepull:
+                result = result or rebase.rebase(ui, repo)
+        finally:
+            repo._isrebasing = False
+    else:
+        repo.lfpullsource = source
+        if not source:
+            source = 'default'
+        result = orig(ui, repo, source, **opts)
+    return result
+
+def override_rebase(orig, ui, repo, **opts):
+    repo._isrebasing = True
+    try:
+        orig(ui, repo, **opts)
+    finally:
+        repo._isrebasing = False
+
+def override_archive(orig, repo, dest, node, kind, decode=True, matchfn=None,
+            prefix=None, mtime=None, subrepos=None):
+    # No need to lock because we are only reading history and lfile caches
+    # neither of which are modified
+
+    lfcommands.cachelfiles(repo.ui, repo, node)
+
+    if kind not in archival.archivers:
+        raise util.Abort(_("unknown archive type '%s'") % kind)
+
+    ctx = repo[node]
+
+    # In Mercurial <= 1.5 the prefix is passed to the archiver so try that
+    # if that doesn't work we are probably in Mercurial >= 1.6 where the
+    # prefix is not handled by the archiver
+    try:
+        archiver = archival.archivers[kind](dest, prefix, mtime or \
+                ctx.date()[0])
+
+        def write(name, mode, islink, getdata):
+            if matchfn and not matchfn(name):
+                return
+            data = getdata()
+            if decode:
+                data = repo.wwritedata(name, data)
+            archiver.addfile(name, mode, islink, data)
+    except TypeError:
+        if kind == 'files':
+            if prefix:
+                raise util.Abort(
+                    _('cannot give prefix when archiving to files'))
+        else:
+            prefix = archival.tidyprefix(dest, kind, prefix)
+
+        def write(name, mode, islink, getdata):
+            if matchfn and not matchfn(name):
+                return
+            data = getdata()
+            if decode:
+                data = repo.wwritedata(name, data)
+            archiver.addfile(prefix + name, mode, islink, data)
+
+        archiver = archival.archivers[kind](dest, mtime or ctx.date()[0])
+
+    if repo.ui.configbool("ui", "archivemeta", True):
+        def metadata():
+            base = 'repo: %s\nnode: %s\nbranch: %s\n' % (
+                hex(repo.changelog.node(0)), hex(node), ctx.branch())
+
+            tags = ''.join('tag: %s\n' % t for t in ctx.tags()
+                           if repo.tagtype(t) == 'global')
+            if not tags:
+                repo.ui.pushbuffer()
+                opts = {'template': '{latesttag}\n{latesttagdistance}',
+                        'style': '', 'patch': None, 'git': None}
+                cmdutil.show_changeset(repo.ui, repo, opts).show(ctx)
+                ltags, dist = repo.ui.popbuffer().split('\n')
+                tags = ''.join('latesttag: %s\n' % t for t in ltags.split(':'))
+                tags += 'latesttagdistance: %s\n' % dist
+
+            return base + tags
+
+        write('.hg_archival.txt', 0644, False, metadata)
+
+    for f in ctx:
+        ff = ctx.flags(f)
+        getdata = ctx[f].data
+        if lfutil.isstandin(f):
+            path = lfutil.findfile(repo, getdata().strip())
+            f = lfutil.splitstandin(f)
+
+            def getdatafn():
+                try:
+                    fd = open(path, 'rb')
+                    return fd.read()
+                finally:
+                    fd.close()
+
+            getdata = getdatafn
+        write(f, 'x' in ff and 0755 or 0644, 'l' in ff, getdata)
+
+    if subrepos:
+        for subpath in ctx.substate:
+            sub = ctx.sub(subpath)
+            try:
+                sub.archive(repo.ui, archiver, prefix)
+            except TypeError:
+                sub.archive(archiver, prefix)
+
+    archiver.done()
+
+# If a lfile is modified the change is not reflected in its standin until a
+# commit.  cmdutil.bailifchanged raises an exception if the repo has
+# uncommitted changes.  Wrap it to also check if lfiles were changed. This is
+# used by bisect and backout.
+def override_bailifchanged(orig, repo):
+    orig(repo)
+    repo.lfstatus = True
+    modified, added, removed, deleted = repo.status()[:4]
+    repo.lfstatus = False
+    if modified or added or removed or deleted:
+        raise util.Abort(_('outstanding uncommitted changes'))
+
+# Fetch doesn't use cmdutil.bail_if_changed so override it to add the check
+def override_fetch(orig, ui, repo, *pats, **opts):
+    repo.lfstatus = True
+    modified, added, removed, deleted = repo.status()[:4]
+    repo.lfstatus = False
+    if modified or added or removed or deleted:
+        raise util.Abort(_('outstanding uncommitted changes'))
+    return orig(ui, repo, *pats, **opts)
+
+def override_forget(orig, ui, repo, *pats, **opts):
+    installnormalfilesmatchfn(repo[None].manifest())
+    orig(ui, repo, *pats, **opts)
+    restorematchfn()
+    try:
+        # Mercurial >= 1.9
+        m = scmutil.match(repo[None], pats, opts)
+    except ImportError:
+        # Mercurial <= 1.8
+        m = cmdutil.match(repo, pats, opts)
+
+    try:
+        repo.lfstatus = True
+        s = repo.status(match=m, clean=True)
+    finally:
+        repo.lfstatus = False
+    forget = sorted(s[0] + s[1] + s[3] + s[6])
+    forget = [f for f in forget if lfutil.standin(f) in repo[None].manifest()]
+
+    for f in forget:
+        if lfutil.standin(f) not in repo.dirstate and not \
+                os.path.isdir(m.rel(lfutil.standin(f))):
+            ui.warn(_('not removing %s: file is already untracked\n')
+                    % m.rel(f))
+
+    for f in forget:
+        if ui.verbose or not m.exact(f):
+            ui.status(_('removing %s\n') % m.rel(f))
+
+    # Need to lock because standin files are deleted then removed from the
+    # repository and we could race inbetween.
+    wlock = repo.wlock()
+    try:
+        lfdirstate = lfutil.openlfdirstate(ui, repo)
+        for f in forget:
+            if lfdirstate[f] == 'a':
+                lfdirstate.drop(f)
+            else:
+                lfdirstate.remove(f)
+        lfdirstate.write()
+        lfutil.repo_remove(repo, [lfutil.standin(f) for f in forget],
+            unlink=True)
+    finally:
+        wlock.release()
+
+def getoutgoinglfiles(ui, repo, dest=None, **opts):
+    dest = ui.expandpath(dest or 'default-push', dest or 'default')
+    dest, branches = hg.parseurl(dest, opts.get('branch'))
+    revs, checkout = hg.addbranchrevs(repo, repo, branches, opts.get('rev'))
+    if revs:
+        revs = [repo.lookup(rev) for rev in revs]
+
+    # Mercurial <= 1.5 had remoteui in cmdutil, then it moved to hg
+    try:
+        remoteui = cmdutil.remoteui
+    except AttributeError:
+        remoteui = hg.remoteui
+
+    try:
+        remote = hg.repository(remoteui(repo, opts), dest)
+    except error.RepoError:
+        return None
+    o = lfutil.findoutgoing(repo, remote, False)
+    if not o:
+        return None
+    o = repo.changelog.nodesbetween(o, revs)[0]
+    if opts.get('newest_first'):
+        o.reverse()
+
+    toupload = set()
+    for n in o:
+        parents = [p for p in repo.changelog.parents(n) if p != node.nullid]
+        ctx = repo[n]
+        files = set(ctx.files())
+        if len(parents) == 2:
+            mc = ctx.manifest()
+            mp1 = ctx.parents()[0].manifest()
+            mp2 = ctx.parents()[1].manifest()
+            for f in mp1:
+                if f not in mc:
+                        files.add(f)
+            for f in mp2:
+                if f not in mc:
+                    files.add(f)
+            for f in mc:
+                if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
+                    files.add(f)
+        toupload = toupload.union(set([f for f in files if lfutil.isstandin(f)\
+            and f in ctx]))
+    return toupload
+
+def override_outgoing(orig, ui, repo, dest=None, **opts):
+    orig(ui, repo, dest, **opts)
+
+    if opts.pop('large', None):
+        toupload = getoutgoinglfiles(ui, repo, dest, **opts)
+        if toupload is None:
+            ui.status(_('largefiles: No remote repo\n'))
+        else:
+            ui.status(_('largefiles to upload:\n'))
+            for file in toupload:
+                ui.status(lfutil.splitstandin(file) + '\n')
+            ui.status('\n')
+
+def override_summary(orig, ui, repo, *pats, **opts):
+    orig(ui, repo, *pats, **opts)
+
+    if opts.pop('large', None):
+        toupload = getoutgoinglfiles(ui, repo, None, **opts)
+        if toupload is None:
+            ui.status(_('largefiles: No remote repo\n'))
+        else:
+            ui.status(_('largefiles: %d to upload\n') % len(toupload))
+
+def override_addremove(orig, ui, repo, *pats, **opts):
+    # Check if the parent or child has lfiles if they do don't allow it.  If
+    # there is a symlink in the manifest then getting the manifest throws an
+    # exception catch it and let addremove deal with it. This happens in
+    # Mercurial's test test-addremove-symlink
+    try:
+        manifesttip = set(repo['tip'].manifest())
+    except util.Abort:
+        manifesttip = set()
+    try:
+        manifestworking = set(repo[None].manifest())
+    except util.Abort:
+        manifestworking = set()
+
+    # Manifests are only iterable so turn them into sets then union
+    for file in manifesttip.union(manifestworking):
+        if file.startswith(lfutil.shortname):
+            raise util.Abort(
+                _('addremove cannot be run on a repo with largefiles'))
+
+    return orig(ui, repo, *pats, **opts)
+
+# Calling purge with --all will cause the lfiles to be deleted.
+# Override repo.status to prevent this from happening.
+def override_purge(orig, ui, repo, *dirs, **opts):
+    oldstatus = repo.status
+    def override_status(node1='.', node2=None, match=None, ignored=False,
+                        clean=False, unknown=False, listsubrepos=False):
+        r = oldstatus(node1, node2, match, ignored, clean, unknown,
+                      listsubrepos)
+        lfdirstate = lfutil.openlfdirstate(ui, repo)
+        modified, added, removed, deleted, unknown, ignored, clean = r
+        unknown = [f for f in unknown if lfdirstate[f] == '?']
+        ignored = [f for f in ignored if lfdirstate[f] == '?']
+        return modified, added, removed, deleted, unknown, ignored, clean
+    repo.status = override_status
+    orig(ui, repo, *dirs, **opts)
+    repo.status = oldstatus
+
+def override_rollback(orig, ui, repo, **opts):
+    result = orig(ui, repo, **opts)
+    merge.update(repo, node=None, branchmerge=False, force=True,
+        partial=lfutil.isstandin)
+    lfdirstate = lfutil.openlfdirstate(ui, repo)
+    lfiles = lfutil.listlfiles(repo)
+    oldlfiles = lfutil.listlfiles(repo, repo[None].parents()[0].rev())
+    for file in lfiles:
+        if file in oldlfiles:
+            lfdirstate.normallookup(file)
+        else:
+            lfdirstate.add(file)
+    lfdirstate.write()
+    return result
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/proto.py	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,161 @@
+# Copyright 2011 Fog Creek Software
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+import os
+import tempfile
+import urllib2
+
+from mercurial import error, httprepo, util, wireproto
+from mercurial.i18n import _
+
+import lfutil
+
+LARGEFILES_REQUIRED_MSG = '\nThis repository uses the largefiles extension.' \
+                          '\n\nPlease enable it in your Mercurial config ' \
+                          'file.\n'
+
+def putlfile(repo, proto, sha):
+    """putlfile puts a largefile into a repository's local cache and into the
+    system cache."""
+    f = None
+    proto.redirect()
+    try:
+        try:
+            f = tempfile.NamedTemporaryFile(mode='wb+', prefix='hg-putlfile-')
+            proto.getfile(f)
+            f.seek(0)
+            if sha != lfutil.hexsha1(f):
+                return wireproto.pushres(1)
+            lfutil.copytocacheabsolute(repo, f.name, sha)
+        except IOError:
+            repo.ui.warn(
+                _('error: could not put received data into largefile store'))
+            return wireproto.pushres(1)
+    finally:
+        if f:
+            f.close()
+
+    return wireproto.pushres(0)
+
+def getlfile(repo, proto, sha):
+    """getlfile retrieves a largefile from the repository-local cache or system
+    cache."""
+    filename = lfutil.findfile(repo, sha)
+    if not filename:
+        raise util.Abort(_('requested largefile %s not present in cache') % sha)
+    f = open(filename, 'rb')
+    length = os.fstat(f.fileno())[6]
+    # since we can't set an HTTP content-length header here, and mercurial core
+    # provides no way to give the length of a streamres (and reading the entire
+    # file into RAM would be ill-advised), we just send the length on the first
+    # line of the response, like the ssh proto does for string responses.
+    def generator():
+        yield '%d\n' % length
+        for chunk in f:
+            yield chunk
+    return wireproto.streamres(generator())
+
+def statlfile(repo, proto, sha):
+    """statlfile sends '2\n' if the largefile is missing, '1\n' if it has a
+    mismatched checksum, or '0\n' if it is in good condition"""
+    filename = lfutil.findfile(repo, sha)
+    if not filename:
+        return '2\n'
+    fd = None
+    try:
+        fd = open(filename, 'rb')
+        return lfutil.hexsha1(fd) == sha and '0\n' or '1\n'
+    finally:
+        if fd:
+            fd.close()
+
+def wirereposetup(ui, repo):
+    class lfileswirerepository(repo.__class__):
+        def putlfile(self, sha, fd):
+            # unfortunately, httprepository._callpush tries to convert its
+            # input file-like into a bundle before sending it, so we can't use
+            # it ...
+            if issubclass(self.__class__, httprepo.httprepository):
+                try:
+                    return int(self._call('putlfile', data=fd, sha=sha,
+                        headers={'content-type':'application/mercurial-0.1'}))
+                except (ValueError, urllib2.HTTPError):
+                    return 1
+            # ... but we can't use sshrepository._call because the data=
+            # argument won't get sent, and _callpush does exactly what we want
+            # in this case: send the data straight through
+            else:
+                try:
+                    ret, output = self._callpush("putlfile", fd, sha=sha)
+                    if ret == "":
+                        raise error.ResponseError(_('putlfile failed:'),
+                                output)
+                    return int(ret)
+                except IOError:
+                    return 1
+                except ValueError:
+                    raise error.ResponseError(
+                        _('putlfile failed (unexpected response):'), ret)
+
+        def getlfile(self, sha):
+            stream = self._callstream("getlfile", sha=sha)
+            length = stream.readline()
+            try:
+                length = int(length)
+            except ValueError:
+                self._abort(error.ResponseError(_("unexpected response:"), length))
+            return (length, stream)
+
+        def statlfile(self, sha):
+            try:
+                return int(self._call("statlfile", sha=sha))
+            except (ValueError, urllib2.HTTPError):
+                # if the server returns anything but an integer followed by a
+                # newline, newline, it's not speaking our language; if we get
+                # an HTTP error, we can't be sure the largefile is present;
+                # either way, consider it missing
+                return 2
+
+    repo.__class__ = lfileswirerepository
+
+# advertise the largefiles=serve capability
+def capabilities(repo, proto):
+    return capabilities_orig(repo, proto) + ' largefiles=serve'
+
+# duplicate what Mercurial's new out-of-band errors mechanism does, because
+# clients old and new alike both handle it well
+def webproto_refuseclient(self, message):
+    self.req.header([('Content-Type', 'application/hg-error')])
+    return message
+
+def sshproto_refuseclient(self, message):
+    self.ui.write_err('%s\n-\n' % message)
+    self.fout.write('\n')
+    self.fout.flush()
+
+    return ''
+
+def heads(repo, proto):
+    if lfutil.islfilesrepo(repo):
+        try:
+            # Mercurial >= f4522df38c65
+            return wireproto.ooberror(LARGEFILES_REQUIRED_MSG)
+        except AttributeError:
+            return proto.refuseclient(LARGEFILES_REQUIRED_MSG)
+    return wireproto.heads(repo, proto)
+
+def sshrepo_callstream(self, cmd, **args):
+    if cmd == 'heads' and self.capable('largefiles'):
+        cmd = 'lheads'
+    if cmd == 'batch' and self.capable('largefiles'):
+        args['cmds'] = args['cmds'].replace('heads ', 'lheads ')
+    return ssh_oldcallstream(self, cmd, **args)
+
+def httprepo_callstream(self, cmd, **args):
+    if cmd == 'heads' and self.capable('largefiles'):
+        cmd = 'lheads'
+    if cmd == 'batch' and self.capable('largefiles'):
+        args['cmds'] = args['cmds'].replace('heads ', 'lheads ')
+    return http_oldcallstream(self, cmd, **args)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/remotestore.py	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,106 @@
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''Remote largefile store; the base class for servestore'''
+
+import urllib2
+import HTTPError
+
+from mercurial import util
+from mercurial.i18n import _
+
+import lfutil
+import basestore
+
+class remotestore(basestore.basestore):
+    """A largefile store accessed over a network"""
+    def __init__(self, ui, repo, url):
+        super(remotestore, self).__init__(ui, repo, url)
+
+    def put(self, source, hash):
+        if self._verify(hash):
+            return
+        if self.sendfile(source, hash):
+            raise util.Abort(
+                _('remotestore: could not put %s to remote store %s')
+                % (source, self.url))
+        self.ui.debug(
+            _('remotestore: put %s to remote store %s') % (source, self.url))
+
+    def exists(self, hash):
+        return self._verify(hash)
+
+    def sendfile(self, filename, hash):
+        self.ui.debug('remotestore: sendfile(%s, %s)\n' % (filename, hash))
+        fd = None
+        try:
+            try:
+                fd = lfutil.httpsendfile(self.ui, filename)
+            except IOError, e:
+                raise util.Abort(
+                    _('remotestore: could not open file %s: %s')
+                    % (filename, str(e)))
+            return self._put(hash, fd)
+        finally:
+            if fd:
+                fd.close()
+
+    def _getfile(self, tmpfile, filename, hash):
+        # quit if the largefile isn't there
+        stat = self._stat(hash)
+        if stat:
+            raise util.Abort(_('remotestore: largefile %s is %s') %
+                             (hash, stat == 1 and 'invalid' or 'missing'))
+
+        try:
+            length, infile = self._get(hash)
+        except HTTPError, e:
+            # 401s get converted to util.Aborts; everything else is fine being
+            # turned into a StoreError
+            raise basestore.StoreError(filename, hash, self.url, str(e))
+        except urllib2.URLError, e:
+            # This usually indicates a connection problem, so don't
+            # keep trying with the other files... they will probably
+            # all fail too.
+            raise util.Abort('%s: %s' % (self.url, str(e.reason)))
+        except IOError, e:
+            raise basestore.StoreError(filename, hash, self.url, str(e))
+
+        # Mercurial does not close its SSH connections after writing a stream
+        if length is not None:
+            infile = lfutil.limitreader(infile, length)
+        return lfutil.copyandhash(lfutil.blockstream(infile), tmpfile)
+
+    def _verify(self, hash):
+        return not self._stat(hash)
+
+    def _verifyfile(self, cctx, cset, contents, standin, verified):
+        filename = lfutil.splitstandin(standin)
+        if not filename:
+            return False
+        fctx = cctx[standin]
+        key = (filename, fctx.filenode())
+        if key in verified:
+            return False
+
+        verified.add(key)
+
+        stat = self._stat(hash)
+        if not stat:
+            return False
+        elif stat == 1:
+            self.ui.warn(
+                _('changeset %s: %s: contents differ\n')
+                % (cset, filename))
+            return True # failed
+        elif stat == 2:
+            self.ui.warn(
+                _('changeset %s: %s missing\n')
+                % (cset, filename))
+            return True # failed
+        else:
+            raise util.Abort(_('check failed, unexpected response'
+                               'statlfile: %d') % stat)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/reposetup.py	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,411 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''setup for largefiles repositories: reposetup'''
+import copy
+import types
+import os
+import re
+
+from mercurial import context, error, manifest, match as match_, \
+        node, util
+from mercurial.i18n import _
+
+import lfcommands
+import proto
+import lfutil
+
+def reposetup(ui, repo):
+    # wire repositories should be given new wireproto functions but not the
+    # other largefiles modifications
+    if not repo.local():
+        return proto.wirereposetup(ui, repo)
+
+    for name in ('status', 'commitctx', 'commit', 'push'):
+        method = getattr(repo, name)
+        #if not (isinstance(method, types.MethodType) and
+        #        method.im_func is repo.__class__.commitctx.im_func):
+        if isinstance(method, types.FunctionType) and method.func_name == \
+            'wrap':
+            ui.warn(_('largefiles: repo method %r appears to have already been'
+                    ' wrapped by another extension: '
+                    'largefiles may behave incorrectly\n')
+                    % name)
+
+    class lfiles_repo(repo.__class__):
+        lfstatus = False
+        def status_nolfiles(self, *args, **kwargs):
+            return super(lfiles_repo, self).status(*args, **kwargs)
+
+        # When lfstatus is set, return a context that gives the names of lfiles
+        # instead of their corresponding standins and identifies the lfiles as
+        # always binary, regardless of their actual contents.
+        def __getitem__(self, changeid):
+            ctx = super(lfiles_repo, self).__getitem__(changeid)
+            if self.lfstatus:
+                class lfiles_manifestdict(manifest.manifestdict):
+                    def __contains__(self, filename):
+                        if super(lfiles_manifestdict,
+                                self).__contains__(filename):
+                            return True
+                        return super(lfiles_manifestdict,
+                            self).__contains__(lfutil.shortname+'/' + filename)
+                class lfiles_ctx(ctx.__class__):
+                    def files(self):
+                        filenames = super(lfiles_ctx, self).files()
+                        return [re.sub('^\\'+lfutil.shortname+'/', '', filename) for filename
+                            in filenames]
+                    def manifest(self):
+                        man1 = super(lfiles_ctx, self).manifest()
+                        man1.__class__ = lfiles_manifestdict
+                        return man1
+                    def filectx(self, path, fileid=None, filelog=None):
+                        try:
+                            result = super(lfiles_ctx, self).filectx(path,
+                                fileid, filelog)
+                        except error.LookupError:
+                            # Adding a null character will cause Mercurial to
+                            # identify this as a binary file.
+                            result = super(lfiles_ctx, self).filectx(
+                                lfutil.shortname + '/' + path, fileid,
+                                filelog)
+                            olddata = result.data
+                            result.data = lambda: olddata() + '\0'
+                        return result
+                ctx.__class__ = lfiles_ctx
+            return ctx
+
+        # Figure out the status of big files and insert them into the
+        # appropriate list in the result. Also removes standin files from
+        # the listing. This function reverts to the original status if
+        # self.lfstatus is False
+        def status(self, node1='.', node2=None, match=None, ignored=False,
+                clean=False, unknown=False, listsubrepos=False):
+            listignored, listclean, listunknown = ignored, clean, unknown
+            if not self.lfstatus:
+                try:
+                    return super(lfiles_repo, self).status(node1, node2, match,
+                        listignored, listclean, listunknown, listsubrepos)
+                except TypeError:
+                    return super(lfiles_repo, self).status(node1, node2, match,
+                        listignored, listclean, listunknown)
+            else:
+                # some calls in this function rely on the old version of status
+                self.lfstatus = False
+                if isinstance(node1, context.changectx):
+                    ctx1 = node1
+                else:
+                    ctx1 = repo[node1]
+                if isinstance(node2, context.changectx):
+                    ctx2 = node2
+                else:
+                    ctx2 = repo[node2]
+                working = ctx2.rev() is None
+                parentworking = working and ctx1 == self['.']
+
+                def inctx(file, ctx):
+                    try:
+                        if ctx.rev() is None:
+                            return file in ctx.manifest()
+                        ctx[file]
+                        return True
+                    except:
+                        return False
+
+                # create a copy of match that matches standins instead of
+                # lfiles if matcher not set then it is the always matcher so
+                # overwrite that
+                if match is None:
+                    match = match_.always(self.root, self.getcwd())
+
+                def tostandin(file):
+                    if inctx(lfutil.standin(file), ctx2):
+                        return lfutil.standin(file)
+                    return file
+
+                m = copy.copy(match)
+                m._files = [tostandin(f) for f in m._files]
+
+                # get ignored clean and unknown but remove them later if they
+                # were not asked for
+                try:
+                    result = super(lfiles_repo, self).status(node1, node2, m,
+                        True, True, True, listsubrepos)
+                except TypeError:
+                    result = super(lfiles_repo, self).status(node1, node2, m,
+                        True, True, True)
+                if working:
+                    # Hold the wlock while we read lfiles and update the
+                    # lfdirstate
+                    wlock = repo.wlock()
+                    try:
+                        # Any non lfiles that were explicitly listed must be
+                        # taken out or lfdirstate.status will report an error.
+                        # The status of these files was already computed using
+                        # super's status.
+                        lfdirstate = lfutil.openlfdirstate(ui, self)
+                        match._files = [f for f in match._files if f in
+                            lfdirstate]
+                        s = lfdirstate.status(match, [], listignored,
+                                listclean, listunknown)
+                        (unsure, modified, added, removed, missing, unknown,
+                                ignored, clean) = s
+                        if parentworking:
+                            for lfile in unsure:
+                                if ctx1[lfutil.standin(lfile)].data().strip() \
+                                        != lfutil.hashfile(self.wjoin(lfile)):
+                                    modified.append(lfile)
+                                else:
+                                    clean.append(lfile)
+                                    lfdirstate.normal(lfile)
+                            lfdirstate.write()
+                        else:
+                            tocheck = unsure + modified + added + clean
+                            modified, added, clean = [], [], []
+
+                            for lfile in tocheck:
+                                standin = lfutil.standin(lfile)
+                                if inctx(standin, ctx1):
+                                    if ctx1[standin].data().strip() != \
+                                            lfutil.hashfile(self.wjoin(lfile)):
+                                        modified.append(lfile)
+                                    else:
+                                        clean.append(lfile)
+                                else:
+                                    added.append(lfile)
+                    finally:
+                        wlock.release()
+
+                    for standin in ctx1.manifest():
+                        if not lfutil.isstandin(standin):
+                            continue
+                        lfile = lfutil.splitstandin(standin)
+                        if not match(lfile):
+                            continue
+                        if lfile not in lfdirstate:
+                            removed.append(lfile)
+                    # Handle unknown and ignored differently
+                    lfiles = (modified, added, removed, missing, [], [], clean)
+                    result = list(result)
+                    # Unknown files
+                    result[4] = [f for f in unknown if repo.dirstate[f] == '?'\
+                        and not lfutil.isstandin(f)]
+                    # Ignored files must be ignored by both the dirstate and
+                    # lfdirstate
+                    result[5] = set(ignored).intersection(set(result[5]))
+                    # combine normal files and lfiles
+                    normals = [[fn for fn in filelist if not \
+                        lfutil.isstandin(fn)] for filelist in result]
+                    result = [sorted(list1 + list2) for (list1, list2) in \
+                        zip(normals, lfiles)]
+                else:
+                    def toname(f):
+                        if lfutil.isstandin(f):
+                            return lfutil.splitstandin(f)
+                        return f
+                    result = [[toname(f) for f in items] for items in result]
+
+                if not listunknown:
+                    result[4] = []
+                if not listignored:
+                    result[5] = []
+                if not listclean:
+                    result[6] = []
+                self.lfstatus = True
+                return result
+
+        # This call happens after a commit has occurred. Copy all of the lfiles
+        # into the cache
+        def commitctx(self, *args, **kwargs):
+            node = super(lfiles_repo, self).commitctx(*args, **kwargs)
+            ctx = self[node]
+            for filename in ctx.files():
+                if lfutil.isstandin(filename) and filename in ctx.manifest():
+                    realfile = lfutil.splitstandin(filename)
+                    lfutil.copytocache(self, ctx.node(), realfile)
+
+            return node
+
+        # This call happens before a commit has occurred. The lfile standins
+        # have not had their contents updated (to reflect the hash of their
+        # lfile).  Do that here.
+        def commit(self, text="", user=None, date=None, match=None,
+                force=False, editor=False, extra={}):
+            orig = super(lfiles_repo, self).commit
+
+            wlock = repo.wlock()
+            try:
+                if getattr(repo, "_isrebasing", False):
+                    # We have to take the time to pull down the new lfiles now.
+                    # Otherwise if we are rebasing, any lfiles that were
+                    # modified in the changesets we are rebasing on top of get
+                    # overwritten either by the rebase or in the first commit
+                    # after the rebase.
+                    lfcommands.updatelfiles(repo.ui, repo)
+                # Case 1: user calls commit with no specific files or
+                # include/exclude patterns: refresh and commit everything.
+                if (match is None) or (not match.anypats() and not \
+                        match.files()):
+                    lfiles = lfutil.listlfiles(self)
+                    lfdirstate = lfutil.openlfdirstate(ui, self)
+                    # this only loops through lfiles that exist (not
+                    # removed/renamed)
+                    for lfile in lfiles:
+                        if os.path.exists(self.wjoin(lfutil.standin(lfile))):
+                            # this handles the case where a rebase is being
+                            # performed and the working copy is not updated
+                            # yet.
+                            if os.path.exists(self.wjoin(lfile)):
+                                lfutil.updatestandin(self,
+                                    lfutil.standin(lfile))
+                                lfdirstate.normal(lfile)
+                    for lfile in lfdirstate:
+                        if not os.path.exists(
+                                repo.wjoin(lfutil.standin(lfile))):
+                            try:
+                                # Mercurial >= 1.9
+                                lfdirstate.drop(lfile)
+                            except AttributeError:
+                                # Mercurial <= 1.8
+                                lfdirstate.forget(lfile)
+                    lfdirstate.write()
+
+                    return orig(text=text, user=user, date=date, match=match,
+                                    force=force, editor=editor, extra=extra)
+
+                for file in match.files():
+                    if lfutil.isstandin(file):
+                        raise util.Abort(
+                            "Don't commit largefile standin. Commit largefile.")
+
+                # Case 2: user calls commit with specified patterns: refresh
+                # any matching big files.
+                smatcher = lfutil.composestandinmatcher(self, match)
+                standins = lfutil.dirstate_walk(self.dirstate, smatcher)
+
+                # No matching big files: get out of the way and pass control to
+                # the usual commit() method.
+                if not standins:
+                    return orig(text=text, user=user, date=date, match=match,
+                                    force=force, editor=editor, extra=extra)
+
+                # Refresh all matching big files.  It's possible that the
+                # commit will end up failing, in which case the big files will
+                # stay refreshed.  No harm done: the user modified them and
+                # asked to commit them, so sooner or later we're going to
+                # refresh the standins.  Might as well leave them refreshed.
+                lfdirstate = lfutil.openlfdirstate(ui, self)
+                for standin in standins:
+                    lfile = lfutil.splitstandin(standin)
+                    if lfdirstate[lfile] <> 'r':
+                        lfutil.updatestandin(self, standin)
+                        lfdirstate.normal(lfile)
+                    else:
+                        try:
+                            # Mercurial >= 1.9
+                            lfdirstate.drop(lfile)
+                        except AttributeError:
+                            # Mercurial <= 1.8
+                            lfdirstate.forget(lfile)
+                lfdirstate.write()
+
+                # Cook up a new matcher that only matches regular files or
+                # standins corresponding to the big files requested by the
+                # user.  Have to modify _files to prevent commit() from
+                # complaining "not tracked" for big files.
+                lfiles = lfutil.listlfiles(repo)
+                match = copy.copy(match)
+                orig_matchfn = match.matchfn
+
+                # Check both the list of lfiles and the list of standins
+                # because if a lfile was removed, it won't be in the list of
+                # lfiles at this point
+                match._files += sorted(standins)
+
+                actualfiles = []
+                for f in match._files:
+                    fstandin = lfutil.standin(f)
+
+                    # Ignore known lfiles and standins
+                    if f in lfiles or fstandin in standins:
+                        continue
+
+                    # Append directory separator to avoid collisions
+                    if not fstandin.endswith(os.sep):
+                        fstandin += os.sep
+
+                    # Prevalidate matching standin directories
+                    if lfutil.any_(st for st in match._files if \
+                            st.startswith(fstandin)):
+                        continue
+                    actualfiles.append(f)
+                match._files = actualfiles
+
+                def matchfn(f):
+                    if orig_matchfn(f):
+                        return f not in lfiles
+                    else:
+                        return f in standins
+
+                match.matchfn = matchfn
+                return orig(text=text, user=user, date=date, match=match,
+                                force=force, editor=editor, extra=extra)
+            finally:
+                wlock.release()
+
+        def push(self, remote, force=False, revs=None, newbranch=False):
+            o = lfutil.findoutgoing(repo, remote, force)
+            if o:
+                toupload = set()
+                o = repo.changelog.nodesbetween(o, revs)[0]
+                for n in o:
+                    parents = [p for p in repo.changelog.parents(n) if p != \
+                        node.nullid]
+                    ctx = repo[n]
+                    files = set(ctx.files())
+                    if len(parents) == 2:
+                        mc = ctx.manifest()
+                        mp1 = ctx.parents()[0].manifest()
+                        mp2 = ctx.parents()[1].manifest()
+                        for f in mp1:
+                            if f not in mc:
+                                files.add(f)
+                        for f in mp2:
+                            if f not in mc:
+                                files.add(f)
+                        for f in mc:
+                            if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f,
+                                    None):
+                                files.add(f)
+
+                    toupload = toupload.union(set([ctx[f].data().strip() for f\
+                        in files if lfutil.isstandin(f) and f in ctx]))
+                lfcommands.uploadlfiles(ui, self, remote, toupload)
+            # Mercurial >= 1.6 takes the newbranch argument, try that first.
+            try:
+                return super(lfiles_repo, self).push(remote, force, revs,
+                    newbranch)
+            except TypeError:
+                return super(lfiles_repo, self).push(remote, force, revs)
+
+    repo.__class__ = lfiles_repo
+
+    def checkrequireslfiles(ui, repo, **kwargs):
+        if 'largefiles' not in repo.requirements and lfutil.any_(
+                lfutil.shortname+'/' in f[0] for f in repo.store.datafiles()):
+            # work around bug in mercurial 1.9 whereby requirements is a list
+            # on newly-cloned repos
+            repo.requirements = set(repo.requirements)
+
+            repo.requirements |= set(['largefiles'])
+            repo._writerequirements()
+
+    checkrequireslfiles(ui, repo)
+
+    ui.setconfig('hooks', 'changegroup.lfiles', checkrequireslfiles)
+    ui.setconfig('hooks', 'commit.lfiles', checkrequireslfiles)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/uisetup.py	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,125 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''setup for largefiles extension: uisetup'''
+
+from mercurial import archival, cmdutil, commands, extensions, filemerge, hg, \
+        httprepo, localrepo, sshrepo, sshserver, wireproto
+from mercurial.i18n import _
+from mercurial.hgweb import hgweb_mod, protocol
+
+import overrides
+import proto
+
+def uisetup(ui):
+    # Disable auto-status for some commands which assume that all
+    # files in the result are under Mercurial's control
+
+    entry = extensions.wrapcommand(commands.table, 'add', overrides.override_add)
+    addopt = [('', 'large', None, _('add as largefile')),
+            ('', 'lfsize', '', _('add all files above this size (in megabytes)'
+                                 'as largefiles (default: 10)'))]
+    entry[1].extend(addopt)
+
+    entry = extensions.wrapcommand(commands.table, 'addremove',
+            overrides.override_addremove)
+    entry = extensions.wrapcommand(commands.table, 'remove', overrides.override_remove)
+    entry = extensions.wrapcommand(commands.table, 'forget', overrides.override_forget)
+    entry = extensions.wrapcommand(commands.table, 'status', overrides.override_status)
+    entry = extensions.wrapcommand(commands.table, 'log', overrides.override_log)
+    entry = extensions.wrapcommand(commands.table, 'rollback',
+            overrides.override_rollback)
+
+    entry = extensions.wrapcommand(commands.table, 'verify', overrides.override_verify)
+    verifyopt = [('', 'large', None, _('verify largefiles')),
+                 ('', 'lfa', None,
+                     _('verify all revisions of largefiles not just current')),
+                 ('', 'lfc', None,
+                     _('verify largefile contents not just existence'))]
+    entry[1].extend(verifyopt)
+
+    entry = extensions.wrapcommand(commands.table, 'outgoing',
+        overrides.override_outgoing)
+    outgoingopt = [('', 'large', None, _('display outgoing largefiles'))]
+    entry[1].extend(outgoingopt)
+    entry = extensions.wrapcommand(commands.table, 'summary', overrides.override_summary)
+    summaryopt = [('', 'large', None, _('display outgoing largefiles'))]
+    entry[1].extend(summaryopt)
+
+    entry = extensions.wrapcommand(commands.table, 'update', overrides.override_update)
+    entry = extensions.wrapcommand(commands.table, 'pull', overrides.override_pull)
+    entry = extensions.wrapfunction(filemerge, 'filemerge', overrides.override_filemerge)
+    entry = extensions.wrapfunction(cmdutil, 'copy', overrides.override_copy)
+
+    # Backout calls revert so we need to override both the command and the
+    # function
+    entry = extensions.wrapcommand(commands.table, 'revert', overrides.override_revert)
+    entry = extensions.wrapfunction(commands, 'revert', overrides.override_revert)
+
+    # clone uses hg._update instead of hg.update even though they are the
+    # same function... so wrap both of them)
+    extensions.wrapfunction(hg, 'update', overrides.hg_update)
+    extensions.wrapfunction(hg, '_update', overrides.hg_update)
+    extensions.wrapfunction(hg, 'clean', overrides.hg_clean)
+    extensions.wrapfunction(hg, 'merge', overrides.hg_merge)
+
+    extensions.wrapfunction(archival, 'archive', overrides.override_archive)
+    if hasattr(cmdutil, 'bailifchanged'):
+        extensions.wrapfunction(cmdutil, 'bailifchanged',
+            overrides.override_bailifchanged)
+    else:
+        extensions.wrapfunction(cmdutil, 'bail_if_changed',
+            overrides.override_bailifchanged)
+
+    # create the new wireproto commands ...
+    wireproto.commands['putlfile'] = (proto.putlfile, 'sha')
+    wireproto.commands['getlfile'] = (proto.getlfile, 'sha')
+    wireproto.commands['statlfile'] = (proto.statlfile, 'sha')
+
+    # ... and wrap some existing ones
+    wireproto.commands['capabilities'] = (proto.capabilities, '')
+    wireproto.commands['heads'] = (proto.heads, '')
+    wireproto.commands['lheads'] = (wireproto.heads, '')
+
+    # make putlfile behave the same as push and {get,stat}lfile behave the same
+    # as pull w.r.t. permissions checks
+    hgweb_mod.perms['putlfile'] = 'push'
+    hgweb_mod.perms['getlfile'] = 'pull'
+    hgweb_mod.perms['statlfile'] = 'pull'
+
+    # the hello wireproto command uses wireproto.capabilities, so it won't see
+    # our largefiles capability unless we replace the actual function as well.
+    proto.capabilities_orig = wireproto.capabilities
+    wireproto.capabilities = proto.capabilities
+
+    # these let us reject non-lfiles clients and make them display our error
+    # messages
+    protocol.webproto.refuseclient = proto.webproto_refuseclient
+    sshserver.sshserver.refuseclient = proto.sshproto_refuseclient
+
+    # can't do this in reposetup because it needs to have happened before
+    # wirerepo.__init__ is called
+    proto.ssh_oldcallstream = sshrepo.sshrepository._callstream
+    proto.http_oldcallstream = httprepo.httprepository._callstream
+    sshrepo.sshrepository._callstream = proto.sshrepo_callstream
+    httprepo.httprepository._callstream = proto.httprepo_callstream
+
+    # don't die on seeing a repo with the largefiles requirement
+    localrepo.localrepository.supported |= set(['largefiles'])
+
+    # override some extensions' stuff as well
+    for name, module in extensions.extensions():
+        if name == 'fetch':
+            extensions.wrapcommand(getattr(module, 'cmdtable'), 'fetch',
+                overrides.override_fetch)
+        if name == 'purge':
+            extensions.wrapcommand(getattr(module, 'cmdtable'), 'purge',
+                overrides.override_purge)
+        if name == 'rebase':
+            extensions.wrapcommand(getattr(module, 'cmdtable'), 'rebase',
+                overrides.override_rebase)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/usage.txt	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,51 @@
+Largefiles allows for tracking large, incompressible binary files in Mercurial
+without requiring excessive bandwidth for clones and pulls.  Files added as
+largefiles are not tracked directly by Mercurial; rather, their revisions are
+identified by a checksum, and Mercurial tracks these checksums.  This way, when
+you clone a repository or pull in changesets, the large files in older
+revisions of the repository are not needed, and only the ones needed to update
+to the current version are downloaded.  This saves both disk space and
+bandwidth.
+
+If you are starting a new repository or adding new large binary files, using
+largefiles for them is as easy as adding '--large' to your hg add command.  For
+example:
+
+$ dd if=/dev/urandom of=thisfileislarge count=2000
+$ hg add --large thisfileislarge
+$ hg commit -m 'add thisfileislarge, which is large, as a largefile'
+
+When you push a changeset that affects largefiles to a remote repository, its
+largefile revisions will be uploaded along with it.  Note that the remote
+Mercurial must also have the largefiles extension enabled for this to work.
+
+When you pull a changeset that affects largefiles from a remote repository,
+nothing different from Mercurial's normal behavior happens.  However, when you
+update to such a revision, any largefiles needed by that revision are
+downloaded and cached if they have never been downloaded before.  This means
+that network access is required to update to revision you have not yet updated
+to.
+
+If you already have large files tracked by Mercurial without the largefiles
+extension, you will need to convert your repository in order to benefit from
+largefiles.  This is done with the 'hg lfconvert' command:
+
+$ hg lfconvert --size 10 oldrepo newrepo
+
+By default, in repositories that already have largefiles in them, any new file
+over 10MB will automatically be added as largefiles.  To change this
+threshhold, set [largefiles].size in your Mercurial config file to the minimum
+size in megabytes to track as a largefile, or use the --lfsize option to the
+add command (also in megabytes):
+
+[largefiles]
+size = 2
+
+$ hg add --lfsize 2
+
+The [largefiles].patterns config option allows you to specify specific
+space-separated filename patterns (in shell glob syntax) that should always be
+tracked as largefiles:
+
+[largefiles]
+pattens = *.jpg *.{png,bmp} library.zip content/audio/*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/wirestore.py	Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,29 @@
+# Copyright 2010-2011 Fog Creek Software
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''largefile store working over mercurial's wire protocol'''
+
+import lfutil
+import remotestore
+
+class wirestore(remotestore.remotestore):
+    def __init__(self, ui, repo, remote):
+        cap = remote.capable('largefiles')
+        if not cap:
+            raise lfutil.storeprotonotcapable([])
+        storetypes = cap.split(',')
+        if not 'serve' in storetypes:
+            raise lfutil.storeprotonotcapable(storetypes)
+        self.remote = remote
+        super(wirestore, self).__init__(ui, repo, remote.url())
+
+    def _put(self, hash, fd):
+        return self.remote.putlfile(hash, fd)
+
+    def _get(self, hash):
+        return self.remote.getlfile(hash)
+
+    def _stat(self, hash):
+        return self.remote.statlfile(hash)