Patchwork [4,of,6] lfs: add server side support for the Batch API

login
register
mail settings
Submitter Matt Harbison
Date March 19, 2018, 4:08 a.m.
Message ID <a21db2355b92a6725ec5.1521432507@Envy>
Download mbox | patch
Permalink /patch/29605/
State Accepted
Headers show

Comments

Matt Harbison - March 19, 2018, 4:08 a.m.
# HG changeset patch
# User Matt Harbison <matt_harbison@yahoo.com>
# Date 1521265677 14400
#      Sat Mar 17 01:47:57 2018 -0400
# Node ID a21db2355b92a6725ec51cd853d44a511a569bb7
# Parent  b3d23eed96ea829a4b201f6857cb3195fc308aca
lfs: add server side support for the Batch API

The request.py change was borrowed from D2849.
Yuya Nishihara - March 29, 2018, 12:27 p.m.
On Mon, 19 Mar 2018 00:08:27 -0400, Matt Harbison wrote:
> # HG changeset patch
> # User Matt Harbison <matt_harbison@yahoo.com>
> # Date 1521265677 14400
> #      Sat Mar 17 01:47:57 2018 -0400
> # Node ID a21db2355b92a6725ec51cd853d44a511a569bb7
> # Parent  b3d23eed96ea829a4b201f6857cb3195fc308aca
> lfs: add server side support for the Batch API

> +from . import (
> +    blobstore,
> +    pointer,
> +)

Dropped unused import.

> +        try:
> +            verifies = store.verify(oid)
> +        except IOError as inst:
> +            if inst.errno != errno.ENOENT:
> +                rsp['error'] = {
> +                    'code': 500,
> +                    'message': e.strerror or 'Internal Server Server'

s/e/inst/

> --- a/mercurial/hgweb/request.py
> +++ b/mercurial/hgweb/request.py
> @@ -325,6 +325,9 @@ def parserequestfromenv(env, reponame=No
>      if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
>          headers['Content-Length'] = env['CONTENT_LENGTH']
>  
> +    if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
> +        headers['Content-Type'] = env['CONTENT_TYPE']

Dropped this as it is already in.

Patch

diff --git a/hgext/lfs/wireprotolfsserver.py b/hgext/lfs/wireprotolfsserver.py
--- a/hgext/lfs/wireprotolfsserver.py
+++ b/hgext/lfs/wireprotolfsserver.py
@@ -7,6 +7,10 @@ 
 
 from __future__ import absolute_import
 
+import datetime
+import errno
+import json
+
 from mercurial.hgweb import (
     common as hgwebcommon,
 )
@@ -15,6 +19,14 @@  from mercurial import (
     pycompat,
 )
 
+from . import (
+    blobstore,
+    pointer,
+)
+
+HTTP_OK = hgwebcommon.HTTP_OK
+HTTP_BAD_REQUEST = hgwebcommon.HTTP_BAD_REQUEST
+
 def handlewsgirequest(orig, rctx, req, res, checkperm):
     """Wrap wireprotoserver.handlewsgirequest() to possibly process an LFS
     request if it is left unprocessed by the wrapped method.
@@ -46,13 +58,177 @@  def handlewsgirequest(orig, rctx, req, r
         res.setbodybytes(b'0\n%s\n' % pycompat.bytestr(e))
         return True
 
+def _sethttperror(res, code, message=None):
+    res.status = hgwebcommon.statusmessage(code, message=message)
+    res.headers[b'Content-Type'] = b'text/plain; charset=utf-8'
+    res.setbodybytes(b'')
+
 def _processbatchrequest(repo, req, res):
     """Handle a request for the Batch API, which is the gateway to granting file
     access.
 
     https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
     """
-    return False
+
+    # Mercurial client request:
+    #
+    #   HOST: localhost:$HGPORT
+    #   ACCEPT: application/vnd.git-lfs+json
+    #   ACCEPT-ENCODING: identity
+    #   USER-AGENT: git-lfs/2.3.4 (Mercurial 4.5.2+1114-f48b9754f04c+20180316)
+    #   Content-Length: 125
+    #   Content-Type: application/vnd.git-lfs+json
+    #
+    #   {
+    #     "objects": [
+    #       {
+    #         "oid": "31cf...8e5b"
+    #         "size": 12
+    #       }
+    #     ]
+    #     "operation": "upload"
+    #  }
+
+    if (req.method != b'POST'
+        or req.headers[b'Content-Type'] != b'application/vnd.git-lfs+json'
+        or req.headers[b'Accept'] != b'application/vnd.git-lfs+json'):
+        # TODO: figure out what the proper handling for a bad request to the
+        #       Batch API is.
+        _sethttperror(res, HTTP_BAD_REQUEST, b'Invalid Batch API request')
+        return True
+
+    # XXX: specify an encoding?
+    lfsreq = json.loads(req.bodyfh.read())
+
+    # If no transfer handlers are explicitly requested, 'basic' is assumed.
+    if 'basic' not in lfsreq.get('transfers', ['basic']):
+        _sethttperror(res, HTTP_BAD_REQUEST,
+                      b'Only the basic LFS transfer handler is supported')
+        return True
+
+    operation = lfsreq.get('operation')
+    if operation not in ('upload', 'download'):
+        _sethttperror(res, HTTP_BAD_REQUEST,
+                      b'Unsupported LFS transfer operation: %s' % operation)
+        return True
+
+    localstore = repo.svfs.lfslocalblobstore
+
+    objects = [p for p in _batchresponseobjects(req, lfsreq.get('objects', []),
+                                                operation, localstore)]
+
+    rsp = {
+        'transfer': 'basic',
+        'objects': objects,
+    }
+
+    res.status = hgwebcommon.statusmessage(HTTP_OK)
+    res.headers[b'Content-Type'] = b'application/vnd.git-lfs+json'
+    res.setbodybytes(pycompat.bytestr(json.dumps(rsp)))
+
+    return True
+
+def _batchresponseobjects(req, objects, action, store):
+    """Yield one dictionary of attributes for the Batch API response for each
+    object in the list.
+
+    req: The parsedrequest for the Batch API request
+    objects: The list of objects in the Batch API object request list
+    action: 'upload' or 'download'
+    store: The local blob store for servicing requests"""
+
+    # Successful lfs-test-server response to solict an upload:
+    # {
+    #    u'objects': [{
+    #       u'size': 12,
+    #       u'oid': u'31cf...8e5b',
+    #       u'actions': {
+    #           u'upload': {
+    #               u'href': u'http://localhost:$HGPORT/objects/31cf...8e5b',
+    #               u'expires_at': u'0001-01-01T00:00:00Z',
+    #               u'header': {
+    #                   u'Accept': u'application/vnd.git-lfs'
+    #               }
+    #           }
+    #       }
+    #    }]
+    # }
+
+    # TODO: Sort out the expires_at/expires_in/authenticated keys.
+
+    for obj in objects:
+        # Convert unicode to ASCII to create a filesystem path
+        oid = obj.get('oid').encode('ascii')
+        rsp = {
+            'oid': oid,
+            'size': obj.get('size'),  # XXX: should this check the local size?
+            #'authenticated': True,
+        }
+
+        exists = True
+        verifies = False
+
+        # Verify an existing file on the upload request, so that the client is
+        # solicited to re-upload if it corrupt locally.  Download requests are
+        # also verified, so the error can be flagged in the Batch API response.
+        # (Maybe we can use this to short circuit the download for `hg verify`,
+        # IFF the client can assert that the remote end is an hg server.)
+        # Otherwise, it's potentially overkill on download, since it is also
+        # verified as the file is streamed to the caller.
+        try:
+            verifies = store.verify(oid)
+        except IOError as inst:
+            if inst.errno != errno.ENOENT:
+                rsp['error'] = {
+                    'code': 500,
+                    'message': e.strerror or 'Internal Server Server'
+                }
+                yield rsp
+                continue
+
+            exists = False
+
+        # Items are always listed for downloads.  They are dropped for uploads
+        # IFF they already exist locally.
+        if action == 'download':
+            if not exists:
+                rsp['error'] = {
+                    'code': 404,
+                    'message': "The object does not exist"
+                }
+                yield rsp
+                continue
+
+            elif not verifies:
+                rsp['error'] = {
+                    'code': 422,   # XXX: is this the right code?
+                    'message': "The object is corrupt"
+                }
+                yield rsp
+                continue
+
+        elif verifies:
+            yield rsp  # Skip 'actions': already uploaded
+            continue
+
+        expiresat = datetime.datetime.now() + datetime.timedelta(minutes=10)
+
+        rsp['actions'] = {
+            '%s' % action: {
+                # TODO: Account for the --prefix, if any.
+                'href': '%s/.hg/lfs/objects/%s' % (req.baseurl, oid),
+                # datetime.isoformat() doesn't include the 'Z' suffix
+                "expires_at": expiresat.strftime('%Y-%m-%dT%H:%M:%SZ'),
+                'header': {
+                    # The spec doesn't mention the Accept header here, but avoid
+                    # a gratuitous deviation from lfs-test-server in the test
+                    # output.
+                    'Accept': 'application/vnd.git-lfs'
+                }
+            }
+        }
+
+        yield rsp
 
 def _processbasictransfer(repo, req, res, checkperm):
     """Handle a single file upload (PUT) or download (GET) action for the Basic
diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -325,6 +325,9 @@  def parserequestfromenv(env, reponame=No
     if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
         headers['Content-Length'] = env['CONTENT_LENGTH']
 
+    if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
+        headers['Content-Type'] = env['CONTENT_TYPE']
+
     bodyfh = env['wsgi.input']
     if 'Content-Length' in headers:
         bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))