Patchwork [2,of,2,V2] hgweb: add a hook for processing LFS file transfer requests

login
register
mail settings
Submitter Matt Harbison
Date March 8, 2018, 4:49 a.m.
Message ID <5544688dec388a7c8a98.1520484541@Envy>
Download mbox | patch
Permalink /patch/29113/
State New
Headers show

Comments

Matt Harbison - March 8, 2018, 4:49 a.m.
# HG changeset patch
# User Matt Harbison <matt_harbison@yahoo.com>
# Date 1519275362 18000
#      Wed Feb 21 23:56:02 2018 -0500
# Node ID 5544688dec388a7c8a988c074aab659f059f549f
# Parent  89382cb20bb19e089513b2ce69ef8acfa1f523fd
hgweb: add a hook for processing LFS file transfer requests

As part of this, the PUT request needs to be handled to upload files.  Unlike
the requests to the Batch API, this URI is internally controlled, and provided
to the client in the Batch API.  So without any interoperability concerns, the
URI starts with '/.hg', and reflects where the files are actually stored.

The permission check is deferred to the processing function, because this
request is used for both uploads and downloads.
Gregory Szorc - March 8, 2018, 6:22 a.m.
On Wed, Mar 7, 2018 at 8:49 PM, Matt Harbison <mharbison72@gmail.com> wrote:

> # HG changeset patch
> # User Matt Harbison <matt_harbison@yahoo.com>
> # Date 1519275362 18000
> #      Wed Feb 21 23:56:02 2018 -0500
> # Node ID 5544688dec388a7c8a988c074aab659f059f549f
> # Parent  89382cb20bb19e089513b2ce69ef8acfa1f523fd
> hgweb: add a hook for processing LFS file transfer requests
>

> As part of this, the PUT request needs to be handled to upload files.
> Unlike
> the requests to the Batch API, this URI is internally controlled, and
> provided
> to the client in the Batch API.  So without any interoperability concerns,
> the
> URI starts with '/.hg', and reflects where the files are actually stored.
>
> The permission check is deferred to the processing function, because this
> request is used for both uploads and downloads.
>
> diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py
> --- a/mercurial/hgweb/hgweb_mod.py
> +++ b/mercurial/hgweb/hgweb_mod.py
> @@ -96,6 +96,15 @@
>      """
>      raise ErrorResponse(HTTP_NOT_FOUND)
>
> +def _processlfstransfer(repo, req):
> +    """A hook for the LFS extension to wrap that handles file transfer
> requests.
> +
> +    The caller MUST call ``req.checkperm()`` with 'push' or 'pull' after
> it
> +    determines whether this is an upload or a download, prior to
> accessing any
> +    repository data.
> +    """
> +    raise ErrorResponse(HTTP_NOT_FOUND)
> +
>  class requestcontext(object):
>      """Holds state/context for an individual request.
>
> @@ -382,14 +391,20 @@
>              except ErrorResponse as inst:
>                  return protohandler['handleerror'](inst)
>
> -        # Route LFS Batch API requests to the appropriate handler
> +        # Route LFS Batch API and transfer requests to the appropriate
> handler
>
>          if req.env.get(r'HTTP_USER_AGENT', r'').startswith(r'git-lfs/'):
>              try:
> -                path = req.env.get(r'PATH_INFO')
> +                path = req.env.get(r'PATH_INFO', '')
>                  if path == '/.git/info/lfs/objects/batch':
>                      self.check_perm(rctx, req, None)
>                      return _processlfsbatchreq(rctx.repo, req)
> +                elif path.startswith('/.hg/store/lfs/objects'):
>

This scares me a bit because we're leaking internal storage paths into the
URI space. Must we do this? What's relying on this behavior?


> +                    # NB: This function is responsible for doing the
> appropriate
> +                    # permission checks after determining if this is an
> upload
> +                    # or a download.
> +                    req.checkperm = lambda op: self.check_perm(rctx, req,
> op)
> +                    return _processlfstransfer(rctx.repo, req)
>                  else:
>                      raise ErrorResponse(HTTP_NOT_FOUND)
>              except ErrorResponse as inst:
> diff --git a/mercurial/hgweb/server.py b/mercurial/hgweb/server.py
> --- a/mercurial/hgweb/server.py
> +++ b/mercurial/hgweb/server.py
> @@ -111,6 +111,9 @@
>              self.log_error(r"Exception happened during processing "
>                             r"request '%s':%s%s", self.path, newline, tb)
>
> +    def do_PUT(self):
> +        self.do_POST()
> +
>      def do_GET(self):
>          self.do_POST()
>
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>
Matt Harbison - March 8, 2018, 12:39 p.m.
> On Mar 8, 2018, at 1:22 AM, Gregory Szorc <gregory.szorc@gmail.com> wrote:
> 
>> On Wed, Mar 7, 2018 at 8:49 PM, Matt Harbison <mharbison72@gmail.com> wrote:
>> # HG changeset patch
>> # User Matt Harbison <matt_harbison@yahoo.com>
>> # Date 1519275362 18000
>> #      Wed Feb 21 23:56:02 2018 -0500
>> # Node ID 5544688dec388a7c8a988c074aab659f059f549f
>> # Parent  89382cb20bb19e089513b2ce69ef8acfa1f523fd
>> hgweb: add a hook for processing LFS file transfer requests 
>> 
>> As part of this, the PUT request needs to be handled to upload files.  Unlike
>> the requests to the Batch API, this URI is internally controlled, and provided
>> to the client in the Batch API.  So without any interoperability concerns, the
>> URI starts with '/.hg', and reflects where the files are actually stored.
>> 
>> The permission check is deferred to the processing function, because this
>> request is used for both uploads and downloads.
>> 
>> diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py
>> --- a/mercurial/hgweb/hgweb_mod.py
>> +++ b/mercurial/hgweb/hgweb_mod.py
>> @@ -96,6 +96,15 @@
>>      """
>>      raise ErrorResponse(HTTP_NOT_FOUND)
>> 
>> +def _processlfstransfer(repo, req):
>> +    """A hook for the LFS extension to wrap that handles file transfer requests.
>> +
>> +    The caller MUST call ``req.checkperm()`` with 'push' or 'pull' after it
>> +    determines whether this is an upload or a download, prior to accessing any
>> +    repository data.
>> +    """
>> +    raise ErrorResponse(HTTP_NOT_FOUND)
>> +
>>  class requestcontext(object):
>>      """Holds state/context for an individual request.
>> 
>> @@ -382,14 +391,20 @@
>>              except ErrorResponse as inst:
>>                  return protohandler['handleerror'](inst)
>> 
>> -        # Route LFS Batch API requests to the appropriate handler
>> +        # Route LFS Batch API and transfer requests to the appropriate handler
>> 
>>          if req.env.get(r'HTTP_USER_AGENT', r'').startswith(r'git-lfs/'):
>>              try:
>> -                path = req.env.get(r'PATH_INFO')
>> +                path = req.env.get(r'PATH_INFO', '')
>>                  if path == '/.git/info/lfs/objects/batch':
>>                      self.check_perm(rctx, req, None)
>>                      return _processlfsbatchreq(rctx.repo, req)
>> +                elif path.startswith('/.hg/store/lfs/objects'):
> 
> This scares me a bit because we're leaking internal storage paths into the URI space. Must we do this? What's relying on this behavior?

I don’t think anything relies on this.  It’s just a matter of picking a URI that can never be a tracked item, path to a subrepo, path in a webconf file, or anything in the wire protocol domain.  Starting with '.hg' seemed safest from that POV.

I’m open to suggestions.  We could keep the .git prefix, but that seems weird. I only did that in the first patch for compatibility with git servers, and auto detecting the URL.

>> +                    # NB: This function is responsible for doing the appropriate
>> +                    # permission checks after determining if this is an upload
>> +                    # or a download.
>> +                    req.checkperm = lambda op: self.check_perm(rctx, req, op)
>> +                    return _processlfstransfer(rctx.repo, req)
>>                  else:
>>                      raise ErrorResponse(HTTP_NOT_FOUND)
>>              except ErrorResponse as inst:
>> diff --git a/mercurial/hgweb/server.py b/mercurial/hgweb/server.py
>> --- a/mercurial/hgweb/server.py
>> +++ b/mercurial/hgweb/server.py
>> @@ -111,6 +111,9 @@
>>              self.log_error(r"Exception happened during processing "
>>                             r"request '%s':%s%s", self.path, newline, tb)
>> 
>> +    def do_PUT(self):
>> +        self.do_POST()
>> +
>>      def do_GET(self):
>>          self.do_POST()
>> 
>> _______________________________________________
>> Mercurial-devel mailing list
>> Mercurial-devel@mercurial-scm.org
>> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>

Patch

diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py
--- a/mercurial/hgweb/hgweb_mod.py
+++ b/mercurial/hgweb/hgweb_mod.py
@@ -96,6 +96,15 @@ 
     """
     raise ErrorResponse(HTTP_NOT_FOUND)
 
+def _processlfstransfer(repo, req):
+    """A hook for the LFS extension to wrap that handles file transfer requests.
+
+    The caller MUST call ``req.checkperm()`` with 'push' or 'pull' after it
+    determines whether this is an upload or a download, prior to accessing any
+    repository data.
+    """
+    raise ErrorResponse(HTTP_NOT_FOUND)
+
 class requestcontext(object):
     """Holds state/context for an individual request.
 
@@ -382,14 +391,20 @@ 
             except ErrorResponse as inst:
                 return protohandler['handleerror'](inst)
 
-        # Route LFS Batch API requests to the appropriate handler
+        # Route LFS Batch API and transfer requests to the appropriate handler
 
         if req.env.get(r'HTTP_USER_AGENT', r'').startswith(r'git-lfs/'):
             try:
-                path = req.env.get(r'PATH_INFO')
+                path = req.env.get(r'PATH_INFO', '')
                 if path == '/.git/info/lfs/objects/batch':
                     self.check_perm(rctx, req, None)
                     return _processlfsbatchreq(rctx.repo, req)
+                elif path.startswith('/.hg/store/lfs/objects'):
+                    # NB: This function is responsible for doing the appropriate
+                    # permission checks after determining if this is an upload
+                    # or a download.
+                    req.checkperm = lambda op: self.check_perm(rctx, req, op)
+                    return _processlfstransfer(rctx.repo, req)
                 else:
                     raise ErrorResponse(HTTP_NOT_FOUND)
             except ErrorResponse as inst:
diff --git a/mercurial/hgweb/server.py b/mercurial/hgweb/server.py
--- a/mercurial/hgweb/server.py
+++ b/mercurial/hgweb/server.py
@@ -111,6 +111,9 @@ 
             self.log_error(r"Exception happened during processing "
                            r"request '%s':%s%s", self.path, newline, tb)
 
+    def do_PUT(self):
+        self.do_POST()
+
     def do_GET(self):
         self.do_POST()