Patchwork [5,of,6,RFC] extension: hg-lfs

login
register
mail settings
Submitter Remi Chaintron
Date Oct. 27, 2016, 3:04 p.m.
Message ID <202b2a6872d3b3ffb1f3.1477580643@remi-mbp2.dhcp.thefacebook.com>
Download mbox | patch
Permalink /patch/17207/
State Deferred
Headers show

Comments

Remi Chaintron - Oct. 27, 2016, 3:04 p.m.
# HG changeset patch
# User Remi Chaintron <remi@fb.com>
# Date 1477579974 -3600
#      Thu Oct 27 15:52:54 2016 +0100
# Branch stable
# Node ID 202b2a6872d3b3ffb1f373910c4eadffda8f727a
# Parent  fb19b40dd647dad449e861dc4865e5c584c83e0e
[RFC] extension: hg-lfs

Implementation of the `lfs` extension based on the flagprocessor design
discussed with Pierre-Yves at the mercurial sprint 4.0.
This is an example of relying on the flagprocessor to wrap around high level
filelog methods such as `addrevision()`, `revision()` and `addgroup()` to flag
revisions and modify the contents of filelogs.

Patch

diff --git a/hgext/lfs/__init__.py b/hgext/lfs/__init__.py
new file mode 100644
--- /dev/null
+++ b/hgext/lfs/__init__.py
@@ -0,0 +1,61 @@ 
+# coding=UTF-8
+
+from __future__ import absolute_import
+
+from mercurial import (
+    cmdutil,
+    commands,
+    extensions,
+    filelog,
+)
+
+from . import (
+    blobstore,
+    wrapper,
+)
+
+def reposetup(ui, repo):
+    # Threshold for a file to be considered a large binary blob
+    lfsthreshold = ui.configbytes('lfs', 'threshold', None)
+    if lfsthreshold:
+        repo.svfs.options['lfsthreshold'] = lfsthreshold
+        repo.lfsthreshold = lfsthreshold
+
+    # Path for the local blobstore configuration
+    storepath = ui.config('lfs', 'blobstore', 'cache/blobstore')
+    repo.blobstore = blobstore.local(storepath, repo.opener)
+    repo.svfs.blobstore = repo.blobstore
+
+    # Remote blobstore configuration
+    remotestorepath = ui.config('lfs', 'remoteblobstore', 'dummy')
+    remoteuser = ui.config('lfs', 'remoteuser', None)
+    remotepassword = ui.config('lfs', 'remotepassword', None)
+    if remotestorepath == 'dummy':
+        # Demo only: we're not actually uploading the blobs, but storing them in
+        # tmp instead
+        repo.remoteblobstore = blobstore.dummy()
+    else:
+        # FIXME: configure remote blobstore user/password + chunksize etc
+        repo.remoteblobstore = blobstore.remote(url=remotestorepath,
+                                                user=remoteuser,
+                                                password=remotepassword)
+    repo.svfs.remoteblobstore = repo.remoteblobstore
+
+
+    # Push hook
+    repo.prepushoutgoinghooks.add('lfs', wrapper.prepush)
+
+def extsetup(ui):
+    wrapfunction = extensions.wrapfunction
+    wrapcommand = extensions.wrapcommand
+
+    wrapfunction(filelog.filelog, 'revision', wrapper.revision)
+    wrapfunction(filelog.filelog, 'addrevision', wrapper.addrevision)
+    wrapfunction(filelog.filelog, 'addgroup', wrapper.addgroup)
+    wrapfunction(filelog.filelog, '_peek_islargefile', wrapper._peek_islargefile)
+
+    # Wrap commands that do not follow the default behaviour when treating large
+    # blobs.
+    wrapcommand(commands.table, 'debugdata', wrapper.debugdata)
+    wrapcommand(commands.table, 'push', wrapper.push)
+    wrapcommand(commands.table, 'pull', wrapper.pull)
diff --git a/hgext/lfs/blobstore.py b/hgext/lfs/blobstore.py
new file mode 100644
--- /dev/null
+++ b/hgext/lfs/blobstore.py
@@ -0,0 +1,224 @@ 
+from __future__ import absolute_import
+
+import errno
+import json
+import os
+import re
+import tempfile
+
+from mercurial import (
+    i18n,
+    revlog,
+    util,
+)
+
+from . import pointer
+
+class local(object):
+    """Local blobstore for large file contents.
+
+    This blobstore is used both as a cache and as a staging area for large blobs
+    to be uploaded to the remote blobstore.
+    """
+
+    def __init__(self, path, opener):
+        self._opener = opener
+        self._storepath = path
+
+    @staticmethod
+    def get(opener):
+        """Get the stored local blobstore instance."""
+        if util.safehasattr(opener, 'blobstore'):
+            return opener.blobstore
+        raise UnknownBlobstoreError()
+
+    def write(self, node, data):
+        """Write blob to local blobstore."""
+        assert re.match('[a-f0-9]{40}', node)
+        fp = self._opener(self.filename(node), 'w+', atomictemp=True)
+        try:
+            fp.write(data)
+        finally:
+            fp.close()
+
+    def read(self, node):
+        """Read blob from local blobstore."""
+        assert re.match('[a-f0-9]{40}', node)
+        fp = self._opener(self.filename(node), 'r')
+        try:
+            return fp.read()
+        finally:
+            fp.close()
+
+    def has(self, node):
+        """Returns True if the local blobstore contains the requested blob,
+        False otherwise."""
+        return self._opener.exists(self.filename(node))
+
+    def filename(self, node):
+        """Generates filename for a blob in the local blob store. Defaults to
+        .hg/cache/blobstore/XX/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"""
+        return os.path.join(self._storepath, node[0:2], node[2:])
+
+class remote(object):
+
+    def __init__(self, url=None, user=None, password=None):
+        assert url is not None
+        self.baseurl = url
+        if user is not None and password is not None:
+            urlreq = util.urlreq
+            passwdmanager = urlreq.httppasswordmgrwithdefaultrealm()
+            passwdmanager.add_password(None, url, user, password)
+            authenticator = urlreq.httpbasicauthhandler(passwdmanager)
+            opener = urlreq.buildopener(authenticator)
+            urlreq.installopener(opener)
+
+    @staticmethod
+    def get(opener):
+        """Get the stored remote blobstore instance."""
+        if util.safehasattr(opener, 'remoteblobstore'):
+            return opener.remoteblobstore
+        raise UnknownBlobstoreError()
+
+    def writebatch(self, data, fromstore):
+        """Batch upload from local to remote blobstore."""
+        self._batch(data, fromstore, 'upload')
+
+    def readbatch(self, data, tostore):
+        """Batch download from remote to local blostore."""
+        self._batch(data, tostore, 'download')
+
+    def _batch(self, data, localstore, action):
+        if action not in ['upload', 'download']:
+            # FIXME: we should not have that error raise too high
+            raise UnavailableBatchOperationError(None, action)
+
+        # Create the batch data for git-lfs.
+        urlreq = util.urlreq
+        objects = []
+        for metadata in data:
+            objects.append({
+                'oid': pointer.getoidfrom(metadata),
+                'size': metadata['size'],
+        })
+
+        data = json.dumps({
+            'objects': objects,
+            'operation': action,
+        })
+
+        # Batch upload the blobs to git-lfs.
+        batchreq = urlreq.request(self.baseurl + 'objects/batch', data=data)
+        batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
+        batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
+        raw_response = urlreq.urlopen(batchreq)
+        response = json.loads(raw_response.read())
+
+        for obj in response.get('objects'):
+            oid = str(obj['oid'])
+            try:
+                # The action we're trying to perform should be available for the
+                # current blob.
+                if action not in obj.get('actions'):
+                    raise UnavailableBatchOperationError(oid, action)
+
+                href = obj['actions'][action].get('href')
+                headers = obj['actions'][action].get('header', {}).items()
+
+                if action == 'upload':
+                    # If uploading blobs, read data from local blobstore.
+                    data = localstore.read(oid)
+                    request = urlreq.request(href, data=data)
+                    request.get_method = lambda: 'PUT'
+                else:
+                    request = urlreq.request(href)
+
+                for k, v in headers:
+                    request.add_header(k, v)
+
+                response = urlreq.urlopen(request)
+
+                if action == 'download':
+                    # If downloading blobs, store downloaded data to local
+                    # blobstore
+                    localstore.write(oid, response.read())
+            except util.urlerr.httperror:
+                raise RequestFailedError(oid, action)
+            except UnavailableBatchOperationError:
+                if action == 'upload':
+                    # The blob is already known by the remote blobstore.
+                    continue
+                else:
+                    raise RequestFailedError(oid, action)
+
+class dummy(object):
+    """Dummy store storing blobs to temp directory."""
+
+    def __init__(self, path='/tmp/hgstore'):
+        try:
+            os.makedirs(path)
+        except OSError as exc:
+            if exc.errno == errno.EEXIST:
+                pass
+            else:
+                raise
+        self._storepath = path
+
+    @staticmethod
+    def get(opener):
+        """Get the stored remote blobstore instance."""
+        if util.safehasattr(opener, 'remoteblobstore'):
+            return opener.remoteblobstore
+        raise UnknownBlobstoreError()
+
+    def write(self, node, data):
+        fname = self.filename(node)
+        try:
+            os.makedirs(os.path.dirname(fname))
+        except OSError as exc:
+            if exc.errno == errno.EEXIST:
+                pass
+            else:
+                raise
+        with open(self.filename(node), 'w+') as fp:
+            fp.write(data)
+
+    def read(self, node):
+        with open(self.filename(node), 'r+') as fp:
+            return fp.read()
+
+    def writebatch(self, data, fromstore):
+        for metadata in data:
+            oid = pointer.getoidfrom(metadata)
+            content = fromstore.read(oid)
+            self.write(oid, content)
+
+    def readbatch(self, data, tostore):
+        for metadata in data:
+            oid  = pointer.getoidfrom(metadata)
+            content = self.read(oid)
+            tostore.write(oid, content)
+
+    def filename(self, node):
+        return os.path.join(self._storepath, node)
+
+class UnknownBlobstoreError(revlog.RevlogError):
+    def __init__(self):
+        message = 'attempt to access unknown blobstore'
+        revlog.RevlogError.__init__(self, i18n._(message))
+
+class RequestFailedError(revlog.RevlogError):
+    def __init__(self, oid, action):
+        message = 'the requested file could be %sed: %s' % (action, oid)
+        revlog.RevlogError.__init__(self, i18n._(message))
+
+class UnavailableBatchOperationError(revlog.RevlogError):
+    def __init__(self, oid, action):
+        self.oid = oid
+        self.action = action
+
+        message = 'unknown batch operation "%s"' % self.action
+        if self.oid:
+            message += ' for blob "%s"' % self.oid
+        revlog.RevlogError.__init__(self, i18n._(message))
+
diff --git a/hgext/lfs/pointer.py b/hgext/lfs/pointer.py
new file mode 100644
--- /dev/null
+++ b/hgext/lfs/pointer.py
@@ -0,0 +1,32 @@ 
+# coding=UTF-8
+from __future__ import absolute_import
+
+import re
+
+POINTER_VERSION = 'version https://git-lfs.github.com/spec/v1\n'
+
+def serialize(data, pointer=POINTER_VERSION):
+    matcher = re.compile('[a-z0-9\-\.]+')
+    for key, value in sorted(data.items()):
+        assert matcher.match(key)
+        pointer = pointer + '%s %s\n' % (key, value)
+    return pointer
+
+def deserialize(text):
+    metadata = {}
+    for line in text.splitlines():
+        if len(line) == 0:
+            continue
+        key, value = line.split(' ', 1)
+        if key == 'version':
+            continue
+        metadata[key] = value
+    assert metadata.get('oid')
+    assert metadata.get('size')
+    return metadata
+
+def getoidfrom(metadata):
+    assert 'oid' in metadata
+    oid = str(metadata['oid'])
+    key, oid = oid.split(':', 1)
+    return oid
diff --git a/hgext/lfs/util.py b/hgext/lfs/util.py
new file mode 100644
--- /dev/null
+++ b/hgext/lfs/util.py
@@ -0,0 +1,20 @@ 
+# coding=UTF-8
+
+from __future__ import absolute_import
+
+import hashlib
+
+from mercurial import (
+    revlog,
+    util,
+)
+
+safehasattr = util.safehasattr
+
+def sha256(text):
+    digest = hashlib.sha256()
+    digest.update(text)
+    return digest.hexdigest()
+
+def hash(text, p1, p2):
+    return revlog.hash(text, p1, p2)
diff --git a/hgext/lfs/wrapper.py b/hgext/lfs/wrapper.py
new file mode 100644
--- /dev/null
+++ b/hgext/lfs/wrapper.py
@@ -0,0 +1,201 @@ 
+# coding=UTF-8
+
+from __future__ import absolute_import
+
+from mercurial import (
+    node,
+    revlog,
+)
+
+from . import (
+    blobstore,
+    pointer,
+    util,
+)
+
+def returnmetadata(self, text):
+    """Return metadata transform for flagprocessor.
+
+    This transform is used in commands that require the actual contents of the
+    filelog, such as push, pull and debugdata.
+    Returns a 2-tuple (text, validatehash) where validatehash is always False
+    due to the metadata not matching the original hash.
+    """
+    return (text, False)
+
+def readfromstore(self, text):
+    """Read filelog content from local blobstore transform for flagprocessor.
+
+    Default tranform for flagprocessor, returning contents from blobstore.
+    Returns a 2-typle (text, validatehash) where validatehash is True as the
+    contents of the blobstore should be checked using checkhash.
+    """
+    metadata = pointer.deserialize(text)
+    oid = pointer.getoidfrom(metadata)
+    store = blobstore.local.get(self.opener)
+    if not store.has(oid):
+        blobstore.remote.get(self.opener).readbatch([metadata], store)
+    return (store.read(oid), True)
+
+transformmap = {
+    revlog.REVIDX_ISLARGEFILE: readfromstore,
+}
+
+def revision(orig, self, nodeorrev, _df=None):
+    """filelog.revision wrapper
+
+    This wrapper simply takes care of registering the transformmap in the flag
+    processor.
+    """
+    self.flagprocessor.register(transformmap)
+    try:
+        return orig(self, nodeorrev, _df=_df)
+    finally:
+        self.flagprocessor.unregister(transformmap)
+
+def addrevision(orig, self, text, transaction, link, p1, p2, cachedelta=None,
+                node=None, flags=revlog.REVLOG_DEFAULT_FLAGS):
+    """filelog.addrevision wrapper.
+
+    This function takes care of the translation from blob to metadata.
+    When the extension has been enabled and the size of the blob to commit is
+    larger than the configured threshold, the contents of the blob are pushed to
+    the local blobstore and the metadata stored to the filelog instead.
+    """
+    self.flagprocessor.register(transformmap)
+    try:
+        if self._peek_islargefile(len(text)):
+            # compute sha256 for git-lfs
+            sha = util.sha256(text)
+            # Store actual contents to local blobstore
+            blobstore.local.get(self.opener).write(sha, text)
+            # if the nodeid has not been computed yet, compute the nodeid based
+            # on the original.
+            if node is None:
+                node = util.hash(text, p1, p2)
+            # flag the filelog
+            flags |= revlog.REVIDX_ISLARGEFILE
+            # replace contents with metadata
+            text = pointer.serialize({
+                'oid': 'sha256:%s' % sha,
+                'size': len(text),
+            })
+
+        return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
+                    node=node, flags=flags)
+    finally:
+        self.flagprocessor.unregister(transformmap)
+
+def addgroup(orig, self, cg, linkmapper, transaction, addrevisioncb=None):
+    """filelog.addgroup wrapper.
+
+    This wrapper is necessary for push operations to successfully send filelogs
+    containing metadata by bypassing checkhash.
+    """
+    self.flagprocessor.register(transformmap)
+    try:
+        return orig(self, cg, linkmapper, transaction,
+                    addrevisioncb=addrevisioncb)
+    finally:
+        self.flagprocessor.unregister(transformmap)
+
+def _peek_islargefile(orig, self, length):
+    """filelog._peek_islargefile wrapper.
+
+    This wrapper ensures that when a threshold is configured, files larger than
+    the specified threshold are detected as such.
+    """
+    options = getattr(self.opener, 'options', None)
+
+    if options:
+        threshold = options.get('lfsthreshold')
+
+        if threshold and length > threshold:
+            return True
+    return False
+
+def pull(orig, ui, repo, source="default", **opts):
+    """Pull command wrapper.
+
+    This wrapper overrides the flagprocessor transform for
+    revlog.REVIDX_ISLARGEFILE to return the actual filelog and bypass checkhash
+    as it contains metadata in the case of largefiles.
+
+    This allows to get the filelog "as is" so that its contents are fetched only
+    on update.
+    """
+    transformmap[revlog.REVIDX_ISLARGEFILE] = returnmetadata
+    orig(ui, repo, source=source, **opts)
+    transformmap[revlog.REVIDX_ISLARGEFILE] = readfromstore
+
+def push(orig, ui, repo, dest=None, **opts):
+    """Push command wrapper.
+
+    This wrapper overrides the flagprocessor transform for
+    revlog.REVIDX_ISLARGEFILE to return the actual filelog and bypass checkhash
+    as it contains metadata in the case of largefiles.
+
+    This allows to push metadata to the remote in place of large blobs.
+    """
+    transformmap[revlog.REVIDX_ISLARGEFILE] = returnmetadata
+    orig(ui, repo, dest=dest, **opts)
+    transformmap[revlog.REVIDX_ISLARGEFILE] = readfromstore
+
+def debugdata(orig, ui, repo, file_, rev=None, **opts):
+    """Debugdata command wrapper.
+
+    This wrapper overrides the flagprocessor transform for
+    revlog.REVIDX_ISLARGEFILE to return the actual filelog and bypass checkhash
+    as it contains metadata in the case of largefiles.
+
+    This ensure debugdata can perform its role of dumping the actual contents of
+    the filelog.
+    """
+    transformmap[revlog.REVIDX_ISLARGEFILE] = returnmetadata
+    orig(ui, repo, file_, rev=rev, **opts)
+    transformmap[revlog.REVIDX_ISLARGEFILE] = readfromstore
+
+def prepush(pushop):
+    """Prepush hook.
+
+    Read through the revisions to push, looking for filelog entries that can be
+    deserialized into metadata so that we can block the push on their upload to
+    the remote blobstore.
+    """
+    repo = pushop.repo
+    remoterepo = pushop.remote.local()
+
+    # We beed to pass on the information to the remote about the threshold so
+    # that _peek_islargefile can mark the file as large file.
+    threshold = repo.svfs.options.get('lfsthreshold')
+    if threshold is not None:
+        remoterepo.svfs.options['lfsthreshold'] = threshold
+
+    oidstoupload = []
+    for i, n in enumerate(pushop.outgoing.missing):
+        ctx = repo[n]
+        files = set(ctx.files())
+        parents = [p for p in ctx.parents() if p != node.nullid]
+        if len(parents) == 2:
+            mc = ctx.manifest()
+            mp1 = ctx.parents()[0].manifest()
+            mp2 = ctx.parents()[1].manifest()
+            for f in mp1:
+                if f not in mc:
+                    files.add(f)
+            for f in mp2:
+                if f not in mc:
+                    files.add(f)
+            for f in mc:
+                if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
+                    files.add(f)
+
+        for f in files:
+            try:
+                metadata = pointer.deserialize(ctx[f].data())
+                oidstoupload.append(metadata)
+            except Exception:
+                pass
+
+    remoteblob = blobstore.remote.get(repo.svfs)
+    remoteblob.writebatch(oidstoupload, blobstore.local.get(repo.svfs))
diff --git a/mercurial/filelog.py b/mercurial/filelog.py
--- a/mercurial/filelog.py
+++ b/mercurial/filelog.py
@@ -112,6 +112,10 @@ 
                 raise error.CensoredNodeError(self.indexfile, node, text)
             raise
 
+    def islargefile(self, rev):
+        """Check if a file revision is large."""
+        return self.flags(rev) & revlog.REVIDX_ISLARGEFILE
+
     def iscensored(self, rev):
         """Check if a file revision is censored."""
         return self.flags(rev) & revlog.REVIDX_ISCENSORED
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -54,8 +54,9 @@ 
 
 # revlog index flags
 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
+REVIDX_ISLARGEFILE = (1 << 14)
 REVIDX_DEFAULT_FLAGS = 0
-REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
+REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED | REVIDX_ISLARGEFILE
 
 # max size of revlog with inline data
 _maxinline = 131072
@@ -95,6 +96,7 @@ 
     def __init__(self, revlogobject):
         self.flagsbypriority = [
             REVIDX_ISCENSORED,
+            REVIDX_ISLARGEFILE,
         ]
         self.transformmap = {}
         self.revlogobject = revlogobject
@@ -1770,10 +1772,16 @@ 
         """Check if a file revision is censored."""
         return False
 
+    def islargefile(self, rev):
+        return False
+
     def _peek_iscensored(self, baserev, delta, flush):
         """Quickly check if a delta produces a censored revision."""
         return False
 
+    def _peek_islargefile(self, length):
+        return False
+
     def getstrippoint(self, minlink):
         """find the minimum rev that must be stripped to strip the linkrev
 
diff --git a/tests/test-lfs.t b/tests/test-lfs.t
new file mode 100644
--- /dev/null
+++ b/tests/test-lfs.t
@@ -0,0 +1,121 @@ 
+  $ hg init server
+  $ cd server
+
+# require changegroup3
+  $ cat >> .hg/requires << EOF
+  > changegroup3
+  > EOF
+
+# Setup extension
+  $ cat >> .hg/hgrc << EOF
+  > [lfs]
+  > threshold=1000B
+  > EOF
+
+# Clone server and enable extension
+  $ cd ..
+  $ hg clone -q server client
+  $ cd client
+  $ cat >> .hg/hgrc <<EOF
+  > [extensions]
+  > lfs=$TESTDIR/../hgext/lfs/
+  > [lfs]
+  > threshold=1000B
+  > blobstore=cache/localblobstore
+  > EOF
+
+# Commit small file
+  $ echo s > smallfile
+  $ hg commit -Aqm "add small file"
+  $ hg debugdata smallfile 0
+  s
+
+# Commit large file
+  $ echo AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA > largefile
+  $ hg commit -Aqm "add large file"
+
+# Check the blobstore is populated
+  $ find .hg/cache/localblobstore
+  .hg/cache/localblobstore
+  .hg/cache/localblobstore/12
+  .hg/cache/localblobstore/12/28f8759b018cca5b58d3e5d1740d0b827f06cecf8868fd17f35a87ef8aacf6
+
+# Check the blob stored contains the actual contents of the file
+  $ cat .hg/cache/localblobstore/12/28f8759b018cca5b58d3e5d1740d0b827f06cecf8868fd17f35a87ef8aacf6
+  AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+
+# Check the filelog contains metadata
+  $ hg debugdata largefile 0
+  version https://git-lfs.github.com/spec/v1
+  oid sha256:1228f8759b018cca5b58d3e5d1740d0b827f06cecf8868fd17f35a87ef8aacf6
+  size 1501
+
+# Check the contents of the file are fetched from the blobstore when requested
+  $ hg cat -r . largefile
+  AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+
+# Push the blobs to the server
+  $ hg push
+  pushing to $TESTTMP/server (glob)
+  searching for changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 2 changes to 2 files
+
+# Initialize new client (not cloning) and setup extension
+  $ cd ../
+  $ mkdir client2
+  $ cd client2
+  $ hg init
+  $ cat >> .hg/requires << EOF
+  > changegroup3
+  > EOF
+  $ cat >> .hg/hgrc <<EOF
+  > [paths]
+  > default = $TESTTMP/server
+  > [extensions]
+  > lfs=$TESTDIR/../hgext/lfs/
+  > [lfs]
+  > threshold=1000B
+  > blobstore=cache/localblobstore
+  > EOF
+
+# Pull from server
+  $ hg pull default
+  pulling from $TESTTMP/server (glob)
+  requesting all changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 2 changes to 2 files
+  (run 'hg update' to get a working copy)
+
+# Check the blobstore is not yet populated
+  $ [ -e .hg/cache/localblobstore ]
+  [1]
+
+# Update to the last revision containing the large file
+  $ hg update
+  2 files updated, 0 files merged, 0 files removed, 0 files unresolved
+
+# Check the filelog contents actually contain metadata
+  $ hg debugdata largefile 0
+  version https://git-lfs.github.com/spec/v1
+  oid sha256:1228f8759b018cca5b58d3e5d1740d0b827f06cecf8868fd17f35a87ef8aacf6
+  size 1501
+
+# Check the blobstore has been populated on update
+  $ find .hg/cache/localblobstore
+  .hg/cache/localblobstore
+  .hg/cache/localblobstore/12
+  .hg/cache/localblobstore/12/28f8759b018cca5b58d3e5d1740d0b827f06cecf8868fd17f35a87ef8aacf6
+
+# Check the contents of the file are fetched from blobstore when requested
+  $ hg cat -r . largefile
+  AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+
+# Check the file has been copied in the working copy
+  $ cat largefile
+  AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+