Patchwork [5,of,6] lfs: add support for serving blob files

login
register
mail settings
Submitter Matt Harbison
Date March 19, 2018, 4:08 a.m.
Message ID <7901a210c0c3c4a1f84f.1521432508@Envy>
Download mbox | patch
Permalink /patch/29602/
State Accepted
Headers show

Comments

Matt Harbison - March 19, 2018, 4:08 a.m.
# HG changeset patch
# User Matt Harbison <matt_harbison@yahoo.com>
# Date 1521266024 14400
#      Sat Mar 17 01:53:44 2018 -0400
# Node ID 7901a210c0c3c4a1f84fd21ff6e7c9b29454d6bc
# Parent  a21db2355b92a6725ec51cd853d44a511a569bb7
lfs: add support for serving blob files
Yuya Nishihara - March 29, 2018, 12:35 p.m.
On Mon, 19 Mar 2018 00:08:28 -0400, Matt Harbison wrote:
> # HG changeset patch
> # User Matt Harbison <matt_harbison@yahoo.com>
> # Date 1521266024 14400
> #      Sat Mar 17 01:53:44 2018 -0400
> # Node ID 7901a210c0c3c4a1f84fd21ff6e7c9b29454d6bc
> # Parent  a21db2355b92a6725ec51cd853d44a511a569bb7
> lfs: add support for serving blob files
> 
> diff --git a/hgext/lfs/wireprotolfsserver.py b/hgext/lfs/wireprotolfsserver.py
> --- a/hgext/lfs/wireprotolfsserver.py
> +++ b/hgext/lfs/wireprotolfsserver.py
> @@ -10,6 +10,7 @@ from __future__ import absolute_import
>  import datetime
>  import errno
>  import json
> +import os
>  
>  from mercurial.hgweb import (
>      common as hgwebcommon,
> @@ -25,6 +26,7 @@ from . import (
>  )
>  
>  HTTP_OK = hgwebcommon.HTTP_OK
> +HTTP_CREATED = hgwebcommon.HTTP_CREATED
>  HTTP_BAD_REQUEST = hgwebcommon.HTTP_BAD_REQUEST
>  
>  def handlewsgirequest(orig, rctx, req, res, checkperm):
> @@ -242,10 +244,46 @@ def _processbasictransfer(repo, req, res
>      """
>  
>      method = req.method
> +    oid = os.path.basename(req.dispatchpath)

Nit: os.path.basename() shouldn't be used here because dispatchpath isn't a
file path. Perhaps we should reject paths other than '.hg/lfs/objects/{oid}'.

>      elif method == b'GET':
>          checkperm('pull')
>  
> -    return False
> +        res.status = hgwebcommon.statusmessage(HTTP_OK)
> +        res.headers[b'Content-Type'] = b'application/octet-stream'
> +
> +        # TODO: figure out how to send back the file in chunks, instead of
> +        #       reading the whole thing.
> +        res.setbodybytes(localstore.read(oid))

setbodygen()?
Matt Harbison - March 30, 2018, 3:26 a.m.
On Thu, 29 Mar 2018 08:35:19 -0400, Yuya Nishihara <yuya@tcha.org> wrote:

> On Mon, 19 Mar 2018 00:08:28 -0400, Matt Harbison wrote:
>> # HG changeset patch
>> # User Matt Harbison <matt_harbison@yahoo.com>
>> # Date 1521266024 14400
>> #      Sat Mar 17 01:53:44 2018 -0400
>> # Node ID 7901a210c0c3c4a1f84fd21ff6e7c9b29454d6bc
>> # Parent  a21db2355b92a6725ec51cd853d44a511a569bb7
>> lfs: add support for serving blob files
>>
>> diff --git a/hgext/lfs/wireprotolfsserver.py  
>> b/hgext/lfs/wireprotolfsserver.py
>> --- a/hgext/lfs/wireprotolfsserver.py
>> +++ b/hgext/lfs/wireprotolfsserver.py
>> @@ -10,6 +10,7 @@ from __future__ import absolute_import
>>  import datetime
>>  import errno
>>  import json
>> +import os
>>
>>  from mercurial.hgweb import (
>>      common as hgwebcommon,
>> @@ -25,6 +26,7 @@ from . import (
>>  )
>>
>>  HTTP_OK = hgwebcommon.HTTP_OK
>> +HTTP_CREATED = hgwebcommon.HTTP_CREATED
>>  HTTP_BAD_REQUEST = hgwebcommon.HTTP_BAD_REQUEST
>>
>>  def handlewsgirequest(orig, rctx, req, res, checkperm):
>> @@ -242,10 +244,46 @@ def _processbasictransfer(repo, req, res
>>      """
>>
>>      method = req.method
>> +    oid = os.path.basename(req.dispatchpath)
>
> Nit: os.path.basename() shouldn't be used here because dispatchpath  
> isn't a
> file path. Perhaps we should reject paths other than  
> '.hg/lfs/objects/{oid}'.

Good point.  The path check can't be too strict though, because I think  
--prefix alters that.  As it is, I can cause an assertion error with  
certain --prefix values on default.

>
>>      elif method == b'GET':
>>          checkperm('pull')
>>
>> -    return False
>> +        res.status = hgwebcommon.statusmessage(HTTP_OK)
>> +        res.headers[b'Content-Type'] = b'application/octet-stream'
>> +
>> +        # TODO: figure out how to send back the file in chunks,  
>> instead of
>> +        #       reading the whole thing.
>> +        res.setbodybytes(localstore.read(oid))
>
> setbodygen()?

I saw that, but then it isn't clear to me how to close the file  
descriptor, both in the normal and exceptional cases.
Yuya Nishihara - March 30, 2018, 12:12 p.m.
On Thu, 29 Mar 2018 23:26:03 -0400, Matt Harbison wrote:
> >>      elif method == b'GET':
> >>          checkperm('pull')
> >>
> >> -    return False
> >> +        res.status = hgwebcommon.statusmessage(HTTP_OK)
> >> +        res.headers[b'Content-Type'] = b'application/octet-stream'
> >> +
> >> +        # TODO: figure out how to send back the file in chunks,  
> >> instead of
> >> +        #       reading the whole thing.
> >> +        res.setbodybytes(localstore.read(oid))
> >
> > setbodygen()?
> 
> I saw that, but then it isn't clear to me how to close the file  
> descriptor, both in the normal and exceptional cases.

WSGI spec says the returned "iterator" may have close().

https://www.python.org/dev/peps/pep-0333/

Perhaps we'll have to fix the request object and hgweb to handle that, and
add a file-like wrapper which can be used in place of filechunkiter().

Patch

diff --git a/hgext/lfs/wireprotolfsserver.py b/hgext/lfs/wireprotolfsserver.py
--- a/hgext/lfs/wireprotolfsserver.py
+++ b/hgext/lfs/wireprotolfsserver.py
@@ -10,6 +10,7 @@  from __future__ import absolute_import
 import datetime
 import errno
 import json
+import os
 
 from mercurial.hgweb import (
     common as hgwebcommon,
@@ -25,6 +26,7 @@  from . import (
 )
 
 HTTP_OK = hgwebcommon.HTTP_OK
+HTTP_CREATED = hgwebcommon.HTTP_CREATED
 HTTP_BAD_REQUEST = hgwebcommon.HTTP_BAD_REQUEST
 
 def handlewsgirequest(orig, rctx, req, res, checkperm):
@@ -242,10 +244,46 @@  def _processbasictransfer(repo, req, res
     """
 
     method = req.method
+    oid = os.path.basename(req.dispatchpath)
+    localstore = repo.svfs.lfslocalblobstore
 
     if method == b'PUT':
         checkperm('upload')
+
+        # TODO: verify Content-Type?
+
+        existed = localstore.has(oid)
+
+        # TODO: how to handle timeouts?  The body proxy handles limiting to
+        #       Content-Length, but what happens if a client sends less than it
+        #       says it will?
+
+        # TODO: download() will abort if the checksum fails.  It should raise
+        #       something checksum specific that can be caught here, and turned
+        #       into an http code.
+        localstore.download(oid, req.bodyfh)
+
+        statusmessage = hgwebcommon.statusmessage
+        res.status = statusmessage(HTTP_OK if existed else HTTP_CREATED)
+
+        # There's no payload here, but this is the header that lfs-test-server
+        # sends back.  This eliminates some gratuitous test output conditionals.
+        res.headers[b'Content-Type'] = b'text/plain; charset=utf-8'
+        res.setbodybytes(b'')
+
+        return True
     elif method == b'GET':
         checkperm('pull')
 
-    return False
+        res.status = hgwebcommon.statusmessage(HTTP_OK)
+        res.headers[b'Content-Type'] = b'application/octet-stream'
+
+        # TODO: figure out how to send back the file in chunks, instead of
+        #       reading the whole thing.
+        res.setbodybytes(localstore.read(oid))
+
+        return True
+    else:
+        _sethttperror(res, HTTP_BAD_REQUEST,
+                      message=b'Unsupported LFS transfer method: %s' % method)
+        return True
diff --git a/mercurial/hgweb/common.py b/mercurial/hgweb/common.py
--- a/mercurial/hgweb/common.py
+++ b/mercurial/hgweb/common.py
@@ -23,6 +23,7 @@  from .. import (
 httpserver = util.httpserver
 
 HTTP_OK = 200
+HTTP_CREATED = 201
 HTTP_NOT_MODIFIED = 304
 HTTP_BAD_REQUEST = 400
 HTTP_UNAUTHORIZED = 401